diff options
Diffstat (limited to 'src/pip/_internal/operations/prepare.py')
-rw-r--r-- | src/pip/_internal/operations/prepare.py | 462 |
1 files changed, 236 insertions, 226 deletions
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 8bb5a6843..4bf414cb0 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -8,10 +8,9 @@ import logging import mimetypes import os import shutil -from typing import Dict, Iterable, List, Optional, Tuple +from typing import Dict, Iterable, List, Optional from pip._vendor.packaging.utils import canonicalize_name -from pip._vendor.pkg_resources import Distribution from pip._internal.distributions import make_distribution_for_install_requirement from pip._internal.distributions.installed import InstalledDistribution @@ -20,11 +19,14 @@ from pip._internal.exceptions import ( HashMismatch, HashUnpinned, InstallationError, + MetadataInconsistent, NetworkConnectionError, PreviousBuildDirError, VcsHashUnsupported, ) from pip._internal.index.package_finder import PackageFinder +from pip._internal.metadata import BaseDistribution, get_metadata_distribution +from pip._internal.models.direct_url import ArchiveInfo from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.network.download import BatchDownloader, Downloader @@ -33,13 +35,20 @@ from pip._internal.network.lazy_wheel import ( dist_from_wheel_url, ) from pip._internal.network.session import PipSession +from pip._internal.operations.build.build_tracker import BuildTracker from pip._internal.req.req_install import InstallRequirement -from pip._internal.req.req_tracker import RequirementTracker -from pip._internal.utils.deprecation import deprecated -from pip._internal.utils.filesystem import copy2_fixed +from pip._internal.utils.direct_url_helpers import ( + direct_url_for_editable, + direct_url_from_link, +) from pip._internal.utils.hashes import Hashes, MissingHashes from pip._internal.utils.logging import indent_log -from pip._internal.utils.misc import display_path, hide_url, is_installable_dir, rmtree +from pip._internal.utils.misc import ( + display_path, + hash_file, + hide_url, + is_installable_dir, +) from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.unpacking import unpack_file from pip._internal.vcs import vcs @@ -48,30 +57,29 @@ logger = logging.getLogger(__name__) def _get_prepared_distribution( - req, # type: InstallRequirement - req_tracker, # type: RequirementTracker - finder, # type: PackageFinder - build_isolation, # type: bool -): - # type: (...) -> Distribution + req: InstallRequirement, + build_tracker: BuildTracker, + finder: PackageFinder, + build_isolation: bool, + check_build_deps: bool, +) -> BaseDistribution: """Prepare a distribution for installation.""" abstract_dist = make_distribution_for_install_requirement(req) - with req_tracker.track(req): - abstract_dist.prepare_distribution_metadata(finder, build_isolation) - return abstract_dist.get_pkg_resources_distribution() + with build_tracker.track(req): + abstract_dist.prepare_distribution_metadata( + finder, build_isolation, check_build_deps + ) + return abstract_dist.get_metadata_distribution() -def unpack_vcs_link(link, location): - # type: (Link, str) -> None +def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None: vcs_backend = vcs.get_backend_for_scheme(link.scheme) assert vcs_backend is not None - vcs_backend.unpack(location, url=hide_url(link.url)) + vcs_backend.unpack(location, url=hide_url(link.url), verbosity=verbosity) class File: - - def __init__(self, path, content_type): - # type: (str, Optional[str]) -> None + def __init__(self, path: str, content_type: Optional[str]) -> None: self.path = path if content_type is None: self.content_type = mimetypes.guess_type(path)[0] @@ -80,19 +88,16 @@ class File: def get_http_url( - link, # type: Link - download, # type: Downloader - download_dir=None, # type: Optional[str] - hashes=None, # type: Optional[Hashes] -): - # type: (...) -> File + link: Link, + download: Downloader, + download_dir: Optional[str] = None, + hashes: Optional[Hashes] = None, +) -> File: temp_dir = TempDirectory(kind="unpack", globally_managed=True) # If a download dir is specified, is the file already downloaded there? already_downloaded_path = None if download_dir: - already_downloaded_path = _check_download_dir( - link, download_dir, hashes - ) + already_downloaded_path = _check_download_dir(link, download_dir, hashes) if already_downloaded_path: from_path = already_downloaded_path @@ -106,72 +111,14 @@ def get_http_url( return File(from_path, content_type) -def _copy2_ignoring_special_files(src, dest): - # type: (str, str) -> None - """Copying special files is not supported, but as a convenience to users - we skip errors copying them. This supports tools that may create e.g. - socket files in the project source directory. - """ - try: - copy2_fixed(src, dest) - except shutil.SpecialFileError as e: - # SpecialFileError may be raised due to either the source or - # destination. If the destination was the cause then we would actually - # care, but since the destination directory is deleted prior to - # copy we ignore all of them assuming it is caused by the source. - logger.warning( - "Ignoring special file error '%s' encountered copying %s to %s.", - str(e), - src, - dest, - ) - - -def _copy_source_tree(source, target): - # type: (str, str) -> None - target_abspath = os.path.abspath(target) - target_basename = os.path.basename(target_abspath) - target_dirname = os.path.dirname(target_abspath) - - def ignore(d, names): - # type: (str, List[str]) -> List[str] - skipped = [] # type: List[str] - if d == source: - # Pulling in those directories can potentially be very slow, - # exclude the following directories if they appear in the top - # level dir (and only it). - # See discussion at https://github.com/pypa/pip/pull/6770 - skipped += ['.tox', '.nox'] - if os.path.abspath(d) == target_dirname: - # Prevent an infinite recursion if the target is in source. - # This can happen when TMPDIR is set to ${PWD}/... - # and we copy PWD to TMPDIR. - skipped += [target_basename] - return skipped - - shutil.copytree( - source, - target, - ignore=ignore, - symlinks=True, - copy_function=_copy2_ignoring_special_files, - ) - - def get_file_url( - link, # type: Link - download_dir=None, # type: Optional[str] - hashes=None # type: Optional[Hashes] -): - # type: (...) -> File - """Get file and optionally check its hash. - """ + link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None +) -> File: + """Get file and optionally check its hash.""" # If a download dir is specified, is the file already there and valid? already_downloaded_path = None if download_dir: - already_downloaded_path = _check_download_dir( - link, download_dir, hashes - ) + already_downloaded_path = _check_download_dir(link, download_dir, hashes) if already_downloaded_path: from_path = already_downloaded_path @@ -189,13 +136,13 @@ def get_file_url( def unpack_url( - link, # type: Link - location, # type: str - download, # type: Downloader - download_dir=None, # type: Optional[str] - hashes=None, # type: Optional[Hashes] -): - # type: (...) -> Optional[File] + link: Link, + location: str, + download: Downloader, + verbosity: int, + download_dir: Optional[str] = None, + hashes: Optional[Hashes] = None, +) -> Optional[File]: """Unpack link into location, downloading if required. :param hashes: A Hashes object, one of whose embedded hashes must match, @@ -205,32 +152,10 @@ def unpack_url( """ # non-editable vcs urls if link.is_vcs: - unpack_vcs_link(link, location) + unpack_vcs_link(link, location, verbosity=verbosity) return None - # Once out-of-tree-builds are no longer supported, could potentially - # replace the below condition with `assert not link.is_existing_dir` - # - unpack_url does not need to be called for in-tree-builds. - # - # As further cleanup, _copy_source_tree and accompanying tests can - # be removed. - if link.is_existing_dir(): - deprecated( - reason=( - "pip copied the source tree into a temporary directory " - "before building it. This is changing so that packages " - "are built in-place " - 'within the original source tree ("in-tree build").' - ), - replacement=None, - gone_in="21.3", - feature_flag="in-tree-build", - issue=7555, - ) - if os.path.isdir(location): - rmtree(location) - _copy_source_tree(link.file_path, location) - return None + assert not link.is_existing_dir() # file urls if link.is_file: @@ -253,10 +178,11 @@ def unpack_url( return file -def _check_download_dir(link, download_dir, hashes): - # type: (Link, str, Optional[Hashes]) -> Optional[str] - """ Check download_dir for previously downloaded file with correct hash - If a correct file is found return its path else None +def _check_download_dir( + link: Link, download_dir: str, hashes: Optional[Hashes] +) -> Optional[str]: + """Check download_dir for previously downloaded file with correct hash + If a correct file is found return its path else None """ download_path = os.path.join(download_dir, link.filename) @@ -264,15 +190,14 @@ def _check_download_dir(link, download_dir, hashes): return None # If already downloaded, does its hash match? - logger.info('File was already downloaded %s', download_path) + logger.info("File was already downloaded %s", download_path) if hashes: try: hashes.check_against_path(download_path) except HashMismatch: logger.warning( - 'Previously-downloaded file %s has bad hash. ' - 'Re-downloading.', - download_path + "Previously-downloaded file %s has bad hash. Re-downloading.", + download_path, ) os.unlink(download_path) return None @@ -280,30 +205,29 @@ def _check_download_dir(link, download_dir, hashes): class RequirementPreparer: - """Prepares a Requirement - """ + """Prepares a Requirement""" def __init__( self, - build_dir, # type: str - download_dir, # type: Optional[str] - src_dir, # type: str - build_isolation, # type: bool - req_tracker, # type: RequirementTracker - session, # type: PipSession - progress_bar, # type: str - finder, # type: PackageFinder - require_hashes, # type: bool - use_user_site, # type: bool - lazy_wheel, # type: bool - in_tree_build, # type: bool - ): - # type: (...) -> None + build_dir: str, + download_dir: Optional[str], + src_dir: str, + build_isolation: bool, + check_build_deps: bool, + build_tracker: BuildTracker, + session: PipSession, + progress_bar: str, + finder: PackageFinder, + require_hashes: bool, + use_user_site: bool, + lazy_wheel: bool, + verbosity: int, + ) -> None: super().__init__() self.src_dir = src_dir self.build_dir = build_dir - self.req_tracker = req_tracker + self.build_tracker = build_tracker self._session = session self._download = Downloader(session, progress_bar) self._batch_download = BatchDownloader(session, progress_bar) @@ -316,6 +240,9 @@ class RequirementPreparer: # Is build isolation allowed? self.build_isolation = build_isolation + # Should check build dependencies? + self.check_build_deps = check_build_deps + # Should hash-checking be required? self.require_hashes = require_hashes @@ -325,17 +252,16 @@ class RequirementPreparer: # Should wheels be downloaded lazily? self.use_lazy_wheel = lazy_wheel - # Should in-tree builds be used for local paths? - self.in_tree_build = in_tree_build + # How verbose should underlying tooling be? + self.verbosity = verbosity - # Memoized downloaded files, as mapping of url: (path, mime type) - self._downloaded = {} # type: Dict[str, Tuple[str, str]] + # Memoized downloaded files, as mapping of url: path. + self._downloaded: Dict[str, str] = {} # Previous "header" printed for a link-based InstallRequirement self._previous_requirement_header = ("", "") - def _log_preparing_link(self, req): - # type: (InstallRequirement) -> None + def _log_preparing_link(self, req: InstallRequirement) -> None: """Provide context for the requirement being prepared.""" if req.link.is_file and not req.original_link_is_in_wheel_cache: message = "Processing %s" @@ -352,8 +278,9 @@ class RequirementPreparer: with indent_log(): logger.info("Using cached %s", req.link.filename) - def _ensure_link_req_src_dir(self, req, parallel_builds): - # type: (InstallRequirement, bool) -> None + def _ensure_link_req_src_dir( + self, req: InstallRequirement, parallel_builds: bool + ) -> None: """Ensure source_dir of a linked InstallRequirement.""" # Since source_dir is only set for editable requirements. if req.link.is_wheel: @@ -361,7 +288,7 @@ class RequirementPreparer: # directory. return assert req.source_dir is None - if req.link.is_existing_dir() and self.in_tree_build: + if req.link.is_existing_dir(): # build local directories in-tree req.source_dir = req.link.file_path return @@ -378,6 +305,7 @@ class RequirementPreparer: # installation. # FIXME: this won't upgrade when there's an existing # package unpacked in `req.source_dir` + # TODO: this check is now probably dead code if is_installable_dir(req.source_dir): raise PreviousBuildDirError( "pip can't proceed with requirements '{}' due to a" @@ -387,8 +315,7 @@ class RequirementPreparer: "Please delete it and try again.".format(req, req.source_dir) ) - def _get_linked_req_hashes(self, req): - # type: (InstallRequirement) -> Hashes + def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes: # By the time this is called, the requirement's link should have # been checked so we can tell what kind of requirements req is # and raise some more informative errors than otherwise. @@ -420,18 +347,72 @@ class RequirementPreparer: # showing the user what the hash should be. return req.hashes(trust_internet=False) or MissingHashes() - def _fetch_metadata_using_lazy_wheel(self, link): - # type: (Link) -> Optional[Distribution] + def _fetch_metadata_only( + self, + req: InstallRequirement, + ) -> Optional[BaseDistribution]: + if self.require_hashes: + logger.debug( + "Metadata-only fetching is not used as hash checking is required", + ) + return None + # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. + return self._fetch_metadata_using_link_data_attr( + req + ) or self._fetch_metadata_using_lazy_wheel(req.link) + + def _fetch_metadata_using_link_data_attr( + self, + req: InstallRequirement, + ) -> Optional[BaseDistribution]: + """Fetch metadata from the data-dist-info-metadata attribute, if possible.""" + # (1) Get the link to the metadata file, if provided by the backend. + metadata_link = req.link.metadata_link() + if metadata_link is None: + return None + assert req.req is not None + logger.info( + "Obtaining dependency information for %s from %s", + req.req, + metadata_link, + ) + # (2) Download the contents of the METADATA file, separate from the dist itself. + metadata_file = get_http_url( + metadata_link, + self._download, + hashes=metadata_link.as_hashes(), + ) + with open(metadata_file.path, "rb") as f: + metadata_contents = f.read() + # (3) Generate a dist just from those file contents. + metadata_dist = get_metadata_distribution( + metadata_contents, + req.link.filename, + req.req.name, + ) + # (4) Ensure the Name: field from the METADATA file matches the name from the + # install requirement. + # + # NB: raw_name will fall back to the name from the install requirement if + # the Name: field is not present, but it's noted in the raw_name docstring + # that that should NEVER happen anyway. + if metadata_dist.raw_name != req.req.name: + raise MetadataInconsistent( + req, "Name", req.req.name, metadata_dist.raw_name + ) + return metadata_dist + + def _fetch_metadata_using_lazy_wheel( + self, + link: Link, + ) -> Optional[BaseDistribution]: """Fetch metadata using lazy wheel, if possible.""" + # --use-feature=fast-deps must be provided. if not self.use_lazy_wheel: return None - if self.require_hashes: - logger.debug('Lazy wheel is not used as hash checking is required') - return None if link.is_file or not link.is_wheel: logger.debug( - 'Lazy wheel is not used as ' - '%r does not points to a remote wheel', + "Lazy wheel is not used as %r does not point to a remote wheel", link, ) return None @@ -439,22 +420,22 @@ class RequirementPreparer: wheel = Wheel(link.filename) name = canonicalize_name(wheel.name) logger.info( - 'Obtaining dependency information from %s %s', - name, wheel.version, + "Obtaining dependency information from %s %s", + name, + wheel.version, ) - url = link.url.split('#', 1)[0] + url = link.url.split("#", 1)[0] try: return dist_from_wheel_url(name, url, self._session) except HTTPRangeRequestUnsupported: - logger.debug('%s does not support range requests', url) + logger.debug("%s does not support range requests", url) return None def _complete_partial_requirements( self, - partially_downloaded_reqs, # type: Iterable[InstallRequirement] - parallel_builds=False, # type: bool - ): - # type: (...) -> None + partially_downloaded_reqs: Iterable[InstallRequirement], + parallel_builds: bool = False, + ) -> None: """Download any requirements which were only fetched by metadata.""" # Download to a temporary directory. These will be copied over as # needed for downstream 'download', 'wheel', and 'install' commands. @@ -463,7 +444,7 @@ class RequirementPreparer: # Map each link to the requirement that owns it. This allows us to set # `req.local_file_path` on the appropriate requirement after passing # all the links at once into BatchDownloader. - links_to_fully_download = {} # type: Dict[Link, InstallRequirement] + links_to_fully_download: Dict[Link, InstallRequirement] = {} for req in partially_downloaded_reqs: assert req.link links_to_fully_download[req.link] = req @@ -482,35 +463,36 @@ class RequirementPreparer: for req in partially_downloaded_reqs: self._prepare_linked_requirement(req, parallel_builds) - def prepare_linked_requirement(self, req, parallel_builds=False): - # type: (InstallRequirement, bool) -> Distribution + def prepare_linked_requirement( + self, req: InstallRequirement, parallel_builds: bool = False + ) -> BaseDistribution: """Prepare a requirement to be obtained from req.link.""" assert req.link - link = req.link self._log_preparing_link(req) with indent_log(): # Check if the relevant file is already available # in the download directory file_path = None - if self.download_dir is not None and link.is_wheel: + if self.download_dir is not None and req.link.is_wheel: hashes = self._get_linked_req_hashes(req) file_path = _check_download_dir(req.link, self.download_dir, hashes) if file_path is not None: # The file is already available, so mark it as downloaded - self._downloaded[req.link.url] = file_path, None + self._downloaded[req.link.url] = file_path else: # The file is not available, attempt to fetch only metadata - wheel_dist = self._fetch_metadata_using_lazy_wheel(link) - if wheel_dist is not None: + metadata_dist = self._fetch_metadata_only(req) + if metadata_dist is not None: req.needs_more_preparation = True - return wheel_dist + return metadata_dist # None of the optimizations worked, fully prepare the requirement return self._prepare_linked_requirement(req, parallel_builds) - def prepare_linked_requirements_more(self, reqs, parallel_builds=False): - # type: (Iterable[InstallRequirement], bool) -> None + def prepare_linked_requirements_more( + self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False + ) -> None: """Prepare linked requirements more, if needed.""" reqs = [req for req in reqs if req.needs_more_preparation] for req in reqs: @@ -519,12 +501,12 @@ class RequirementPreparer: hashes = self._get_linked_req_hashes(req) file_path = _check_download_dir(req.link, self.download_dir, hashes) if file_path is not None: - self._downloaded[req.link.url] = file_path, None + self._downloaded[req.link.url] = file_path req.needs_more_preparation = False # Prepare requirements we found were already downloaded for some # reason. The other downloads will be completed separately. - partially_downloaded_reqs = [] # type: List[InstallRequirement] + partially_downloaded_reqs: List[InstallRequirement] = [] for req in reqs: if req.needs_more_preparation: partially_downloaded_reqs.append(req) @@ -534,35 +516,58 @@ class RequirementPreparer: # TODO: separate this part out from RequirementPreparer when the v1 # resolver can be removed! self._complete_partial_requirements( - partially_downloaded_reqs, parallel_builds=parallel_builds, + partially_downloaded_reqs, + parallel_builds=parallel_builds, ) - def _prepare_linked_requirement(self, req, parallel_builds): - # type: (InstallRequirement, bool) -> Distribution + def _prepare_linked_requirement( + self, req: InstallRequirement, parallel_builds: bool + ) -> BaseDistribution: assert req.link link = req.link self._ensure_link_req_src_dir(req, parallel_builds) hashes = self._get_linked_req_hashes(req) - if link.is_existing_dir() and self.in_tree_build: + if link.is_existing_dir(): local_file = None elif link.url not in self._downloaded: try: local_file = unpack_url( - link, req.source_dir, self._download, - self.download_dir, hashes + link, + req.source_dir, + self._download, + self.verbosity, + self.download_dir, + hashes, ) except NetworkConnectionError as exc: raise InstallationError( - 'Could not install requirement {} because of HTTP ' - 'error {} for URL {}'.format(req, exc, link) + "Could not install requirement {} because of HTTP " + "error {} for URL {}".format(req, exc, link) ) else: - file_path, content_type = self._downloaded[link.url] + file_path = self._downloaded[link.url] if hashes: hashes.check_against_path(file_path) - local_file = File(file_path, content_type) + local_file = File(file_path, content_type=None) + + # If download_info is set, we got it from the wheel cache. + if req.download_info is None: + # Editables don't go through this function (see + # prepare_editable_requirement). + assert not req.editable + req.download_info = direct_url_from_link(link, req.source_dir) + # Make sure we have a hash in download_info. If we got it as part of the + # URL, it will have been verified and we can rely on it. Otherwise we + # compute it from the downloaded file. + if ( + isinstance(req.download_info.info, ArchiveInfo) + and not req.download_info.info.hash + and local_file + ): + hash = hash_file(local_file.path)[0].hexdigest() + req.download_info.info.hash = f"sha256={hash}" # For use in later processing, # preserve the file path on the requirement. @@ -570,12 +575,15 @@ class RequirementPreparer: req.local_file_path = local_file.path dist = _get_prepared_distribution( - req, self.req_tracker, self.finder, self.build_isolation, + req, + self.build_tracker, + self.finder, + self.build_isolation, + self.check_build_deps, ) return dist - def save_linked_requirement(self, req): - # type: (InstallRequirement) -> None + def save_linked_requirement(self, req: InstallRequirement) -> None: assert self.download_dir is not None assert req.link is not None link = req.link @@ -586,8 +594,9 @@ class RequirementPreparer: if link.is_existing_dir(): logger.debug( - 'Not copying link to destination directory ' - 'since it is a directory: %s', link, + "Not copying link to destination directory " + "since it is a directory: %s", + link, ) return if req.local_file_path is None: @@ -598,31 +607,35 @@ class RequirementPreparer: if not os.path.exists(download_location): shutil.copy(req.local_file_path, download_location) download_path = display_path(download_location) - logger.info('Saved %s', download_path) + logger.info("Saved %s", download_path) def prepare_editable_requirement( self, - req, # type: InstallRequirement - ): - # type: (...) -> Distribution - """Prepare an editable requirement - """ + req: InstallRequirement, + ) -> BaseDistribution: + """Prepare an editable requirement.""" assert req.editable, "cannot prepare a non-editable req as editable" - logger.info('Obtaining %s', req) + logger.info("Obtaining %s", req) with indent_log(): if self.require_hashes: raise InstallationError( - 'The editable requirement {} cannot be installed when ' - 'requiring hashes, because there is no single file to ' - 'hash.'.format(req) + "The editable requirement {} cannot be installed when " + "requiring hashes, because there is no single file to " + "hash.".format(req) ) req.ensure_has_source_dir(self.src_dir) req.update_editable() + assert req.source_dir + req.download_info = direct_url_for_editable(req.unpacked_source_directory) dist = _get_prepared_distribution( - req, self.req_tracker, self.finder, self.build_isolation, + req, + self.build_tracker, + self.finder, + self.build_isolation, + self.check_build_deps, ) req.check_if_exists(self.use_user_site) @@ -631,27 +644,24 @@ class RequirementPreparer: def prepare_installed_requirement( self, - req, # type: InstallRequirement - skip_reason # type: str - ): - # type: (...) -> Distribution - """Prepare an already-installed requirement - """ + req: InstallRequirement, + skip_reason: str, + ) -> BaseDistribution: + """Prepare an already-installed requirement.""" assert req.satisfied_by, "req should have been satisfied but isn't" assert skip_reason is not None, ( "did not get skip reason skipped but req.satisfied_by " "is set to {}".format(req.satisfied_by) ) logger.info( - 'Requirement %s: %s (%s)', - skip_reason, req, req.satisfied_by.version + "Requirement %s: %s (%s)", skip_reason, req, req.satisfied_by.version ) with indent_log(): if self.require_hashes: logger.debug( - 'Since it is already installed, we are trusting this ' - 'package without checking its hash. To ensure a ' - 'completely repeatable environment, install into an ' - 'empty virtualenv.' + "Since it is already installed, we are trusting this " + "package without checking its hash. To ensure a " + "completely repeatable environment, install into an " + "empty virtualenv." ) - return InstalledDistribution(req).get_pkg_resources_distribution() + return InstalledDistribution(req).get_metadata_distribution() |