From 2e66eb7147ae1e8a799b2276c7340f48f63a7220 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Mon, 1 Nov 2021 19:47:33 +0000 Subject: Fix 1461: Better loop breaker for `manifest_maker` The inconsistency for the `package_data` configuration in sdists when `include_package_data=True` in #1461 have been causing some problems for the community for a while, as also shown in #2835. As pointed out by [@jaraco](https://github.com/pypa/setuptools/issues/1461#issuecomment-749092366), this was being caused by a mechanism to break the recursion between the `egg_info` and `sdist` commands. In summary the loop is caused by the following behaviour: - the `egg_info` command uses a subclass of `sdist` (`manifest_maker`) to calculate the MANIFEST, - the `sdist` class needs to know the MANIFEST to calculate the data files when `include_package_data=True` Previously, the mechanism to break this loop was to simply ignore the data files in `sdist` when `include_package_data=True`. The approach implemented in this change was to replace this mechanism, by allowing `manifest_maker` to override the `_safe_data_files` method from `sdist`. --- Please notice [an extensive experiment] (https://github.com/abravalheri/experiment-setuptools-package-data) was carried out to investigate the previous confusing behaviour. There is also [a simplified theoretical analysis] (https://github.com/pyscaffold/pyscaffold/pull/535#issuecomment-956296407) comparing the observed behavior in the experiment and the expected one. This analysis point out to the same offender indicated by [@jaraco](https://github.com/pypa/setuptools/issues/1461#issuecomment-749092366) (which is being replaced in this change). --- setuptools/command/build_py.py | 13 +++++++++++++ setuptools/command/egg_info.py | 11 +++++++++++ setuptools/command/sdist.py | 13 ++++++++----- 3 files changed, 32 insertions(+), 5 deletions(-) (limited to 'setuptools/command') diff --git a/setuptools/command/build_py.py b/setuptools/command/build_py.py index 6a615433..f71c5a56 100644 --- a/setuptools/command/build_py.py +++ b/setuptools/command/build_py.py @@ -67,6 +67,19 @@ class build_py(orig.build_py): self.analyze_manifest() return list(map(self._get_pkg_data_files, self.packages or ())) + def get_data_files_without_manifest(self): + """ + Generate list of ``(package,src_dir,build_dir,filenames)`` tuples, + but without triggering any attempt to analyze or build the manifest. + """ + # Avoid triggering dynamic behavior in __getattr__ + if 'data_files' in self.__dict__: + return self.data_files + # Prevent eventual errors from unset `manifest_files` + # (that would otherwise be set by `analyze_manifest`) + self.__dict__.setdefault('manifest_files', {}) + return list(map(self._get_pkg_data_files, self.packages or ())) + def _get_pkg_data_files(self, package): # Locate package source directory src_dir = self.get_package_dir(package) diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py index 18b81340..ff009861 100644 --- a/setuptools/command/egg_info.py +++ b/setuptools/command/egg_info.py @@ -608,6 +608,17 @@ class manifest_maker(sdist): self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep, is_regex=1) + def _safe_data_files(self, build_py): + """ + The parent class implementation of this method (``sdist``) will try to + include data files, which might cause recursion problems, when + ``include_package_data=True``. + + Therefore we have to avoid triggering any attempt of analyzing/building + the manifest again. + """ + return build_py.get_data_files_without_manifest() + def write_file(filename, contents): """Create a file with the specified name and write 'contents' (a diff --git a/setuptools/command/sdist.py b/setuptools/command/sdist.py index e8062f2e..0285b690 100644 --- a/setuptools/command/sdist.py +++ b/setuptools/command/sdist.py @@ -114,12 +114,15 @@ class sdist(sdist_add_defaults, orig.sdist): def _safe_data_files(self, build_py): """ - Extracting data_files from build_py is known to cause - infinite recursion errors when `include_package_data` - is enabled, so suppress it in that case. + Since the ``sdist`` class is also used to compute the MANIFEST + (via :obj:`setuptools.command.egg_info.manifest_maker`), + there might be recursion problems when trying to obtain the list of + data_files and ``include_package_data=True`` (which in turn depends on + the files included in the MANIFEST). + + To avoid that, ``manifest_maker`` should be able to overwrite this + method and avoid recursive attempts to build/analyze the MANIFEST. """ - if self.distribution.include_package_data: - return () return build_py.data_files def _add_data_files(self, data_files): -- cgit v1.2.1