From a3667ba65a1ff7959da4cb954faa46a57e42c3d8 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Fri, 15 Jul 2022 11:17:37 +0100 Subject: Add boolean flag that allows scm_find_files to bypass SCM listing --- src/setuptools_scm/file_finder.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/setuptools_scm/file_finder.py b/src/setuptools_scm/file_finder.py index 99da792..f14a946 100644 --- a/src/setuptools_scm/file_finder.py +++ b/src/setuptools_scm/file_finder.py @@ -11,7 +11,10 @@ from .utils import trace def scm_find_files( - path: _t.PathT, scm_files: set[str], scm_dirs: set[str] + path: _t.PathT, + scm_files: set[str], + scm_dirs: set[str], + force_all_files: bool = False, ) -> list[str]: """ setuptools compatible file finder that follows symlinks @@ -20,6 +23,7 @@ def scm_find_files( (including symlinks to directories) - scm_dirs: set of scm controlled directories (including directories containing no scm controlled files) + - force_all_files: ignore ``scm_files`` and ``scm_dirs`` and list everything. scm_files and scm_dirs must be absolute with symlinks resolved (realpath), with normalized case (normcase) @@ -38,7 +42,7 @@ def scm_find_files( fn = os.path.join(realdirpath, os.path.normcase(n)) return os.path.islink(fn) and fn not in scm_files - if realdirpath not in scm_dirs: + if not force_all_files and realdirpath not in scm_dirs: # directory not in scm, don't walk it's content dirnames[:] = [] continue @@ -54,13 +58,16 @@ def scm_find_files( # symlink loop protection dirnames[:] = [] continue - dirnames[:] = [dn for dn in dirnames if not _link_not_in_scm(dn)] + dirnames[:] = [ + dn for dn in dirnames if force_all_files or not _link_not_in_scm(dn) + ] for filename in filenames: - if _link_not_in_scm(filename): + if not force_all_files and _link_not_in_scm(filename): continue # dirpath + filename with symlinks preserved fullfilename = os.path.join(dirpath, filename) - if os.path.normcase(os.path.realpath(fullfilename)) in scm_files: + is_tracked = os.path.normcase(os.path.realpath(fullfilename)) in scm_files + if force_all_files or is_tracked: res.append(os.path.join(path, os.path.relpath(fullfilename, realpath))) seen.add(realdirpath) return res -- cgit v1.2.1 From 36f62f707c22843305f9d25900c5413b7e1beb2c Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Fri, 15 Jul 2022 11:19:39 +0100 Subject: Implement fallback file finders for git/hg archives --- src/setuptools_scm/file_finder_git.py | 17 +++++++++++++++++ src/setuptools_scm/file_finder_hg.py | 21 +++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/setuptools_scm/file_finder_git.py b/src/setuptools_scm/file_finder_git.py index a83af21..77ed6df 100644 --- a/src/setuptools_scm/file_finder_git.py +++ b/src/setuptools_scm/file_finder_git.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING from .file_finder import is_toplevel_acceptable from .file_finder import scm_find_files +from .utils import data_from_mime from .utils import do_ex from .utils import trace @@ -101,3 +102,19 @@ def git_find_files(path: _t.PathT = "") -> list[str]: trace("toplevel mismatch", toplevel, fullpath) git_files, git_dirs = _git_ls_files_and_dirs(toplevel) return scm_find_files(path, git_files, git_dirs) + + +def git_archive_find_files(path: _t.PathT = "") -> list[str]: + # This function assumes that ``path`` is obtained from a git archive + # and therefore all the files that should be ignored were already removed. + archival = os.path.join(path, ".git_archival.txt") + if not os.path.exists(archival): + return [] + + data = data_from_mime(archival) + + if "$Format" in data.get("node", ""): + # Substitutions have not been performed, so not a reliable archive + return [] + + return scm_find_files(path, set(), set(), force_all_files=True) diff --git a/src/setuptools_scm/file_finder_hg.py b/src/setuptools_scm/file_finder_hg.py index 4f5e3ec..cb09ff0 100644 --- a/src/setuptools_scm/file_finder_hg.py +++ b/src/setuptools_scm/file_finder_hg.py @@ -2,11 +2,16 @@ from __future__ import annotations import os import subprocess +from typing import TYPE_CHECKING from .file_finder import is_toplevel_acceptable from .file_finder import scm_find_files +from .utils import data_from_mime from .utils import do_ex +if TYPE_CHECKING: + from . import _types as _t + def _hg_toplevel(path: str) -> str | None: try: @@ -49,3 +54,19 @@ def hg_find_files(path: str = "") -> list[str]: assert toplevel is not None hg_files, hg_dirs = _hg_ls_files_and_dirs(toplevel) return scm_find_files(path, hg_files, hg_dirs) + + +def hg_archive_find_files(path: _t.PathT = "") -> list[str]: + # This function assumes that ``path`` is obtained from a mercurial archive + # and therefore all the files that should be ignored were already removed. + archival = os.path.join(path, ".hg_archival.txt") + if not os.path.exists(archival): + return [] + + data = data_from_mime(archival) + + if "node" not in data: + # Ensure file is valid + return [] + + return scm_find_files(path, set(), set(), force_all_files=True) -- cgit v1.2.1 From 56a3772d44b8931e65b78460649514695469a9ed Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Fri, 15 Jul 2022 11:21:43 +0100 Subject: Add integrations and tests for fallback file finders --- setup.cfg | 3 +++ src/setuptools_scm/integration.py | 6 +++++- testing/test_file_finder.py | 24 ++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index e240496..06c51ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,6 +50,9 @@ setuptools.finalize_distribution_options = setuptools_scm.files_command = .hg = setuptools_scm.file_finder_hg:hg_find_files .git = setuptools_scm.file_finder_git:git_find_files +setuptools_scm.files_command_fallback = + .hg_archival.txt = setuptools_scm.file_finder_hg:hg_archive_find_files + .git_archival.txt = setuptools_scm.file_finder_git:git_archive_find_files setuptools_scm.local_scheme = node-and-date = setuptools_scm.version:get_local_node_and_date node-and-timestamp = setuptools_scm.version:get_local_node_and_timestamp diff --git a/src/setuptools_scm/integration.py b/src/setuptools_scm/integration.py index 45c841d..2134ff1 100644 --- a/src/setuptools_scm/integration.py +++ b/src/setuptools_scm/integration.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools import os import warnings from typing import Any @@ -91,7 +92,10 @@ def version_keyword( def find_files(path: _t.PathT = "") -> list[str]: - for ep in iter_entry_points("setuptools_scm.files_command"): + for ep in itertools.chain( + iter_entry_points("setuptools_scm.files_command"), + iter_entry_points("setuptools_scm.files_command_fallback"), + ): command = ep.load() if isinstance(command, str): # this technique is deprecated diff --git a/testing/test_file_finder.py b/testing/test_file_finder.py index a730434..5b63db8 100644 --- a/testing/test_file_finder.py +++ b/testing/test_file_finder.py @@ -201,3 +201,27 @@ def test_symlink_not_in_scm_while_target_is(inwd: WorkDir) -> None: @pytest.mark.skip_commit def test_not_commited(inwd: WorkDir) -> None: assert find_files() == [] + + +def test_unexpanded_git_archival(wd: WorkDir, monkeypatch: pytest.MonkeyPatch) -> None: + # When substitutions in `.git_archival.txt` are not expanded, files should + # not be automatically listed. + monkeypatch.chdir(wd.cwd) + (wd.cwd / ".git_archival.txt").write_text("node: $Format:%H$", encoding="utf-8") + (wd.cwd / "file1.txt").touch() + assert find_files() == [] + + +@pytest.mark.parametrize("archive_file", (".git_archival.txt", ".hg_archival.txt")) +def test_archive( + wd: WorkDir, monkeypatch: pytest.MonkeyPatch, archive_file: str +) -> None: + # When substitutions in `.git_archival.txt` are not expanded, files should + # not be automatically listed. + monkeypatch.chdir(wd.cwd) + sha = "a1bda3d984d1a40d7b00ae1d0869354d6d503001" + (wd.cwd / archive_file).write_text(f"node: {sha}", encoding="utf-8") + (wd.cwd / "data").mkdir() + (wd.cwd / "data" / "datafile").touch() + (wd.cwd / "data" / "datalink").symlink_to("data/datafile") + assert set(find_files()) == _sep({archive_file, "data/datafile", "data/datalink"}) -- cgit v1.2.1 From b054dfa03301389c87ac0adaab1946407492690a Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Fri, 15 Jul 2022 12:24:50 +0100 Subject: Add debuging statements --- src/setuptools_scm/file_finder_git.py | 1 + src/setuptools_scm/file_finder_hg.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/setuptools_scm/file_finder_git.py b/src/setuptools_scm/file_finder_git.py index 77ed6df..775c49d 100644 --- a/src/setuptools_scm/file_finder_git.py +++ b/src/setuptools_scm/file_finder_git.py @@ -117,4 +117,5 @@ def git_archive_find_files(path: _t.PathT = "") -> list[str]: # Substitutions have not been performed, so not a reliable archive return [] + trace("git archive detected - fallback to listing all files") return scm_find_files(path, set(), set(), force_all_files=True) diff --git a/src/setuptools_scm/file_finder_hg.py b/src/setuptools_scm/file_finder_hg.py index cb09ff0..2ce974f 100644 --- a/src/setuptools_scm/file_finder_hg.py +++ b/src/setuptools_scm/file_finder_hg.py @@ -8,6 +8,7 @@ from .file_finder import is_toplevel_acceptable from .file_finder import scm_find_files from .utils import data_from_mime from .utils import do_ex +from .utils import trace if TYPE_CHECKING: from . import _types as _t @@ -69,4 +70,5 @@ def hg_archive_find_files(path: _t.PathT = "") -> list[str]: # Ensure file is valid return [] + trace("hg archive detected - fallback to listing all files") return scm_find_files(path, set(), set(), force_all_files=True) -- cgit v1.2.1 From ae135860562dd7e11674a27bfb142415d3845f9d Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Fri, 15 Jul 2022 13:11:55 +0100 Subject: Fix tests for windows --- testing/test_file_finder.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/testing/test_file_finder.py b/testing/test_file_finder.py index 5b63db8..6c7f2d8 100644 --- a/testing/test_file_finder.py +++ b/testing/test_file_finder.py @@ -223,5 +223,11 @@ def test_archive( (wd.cwd / archive_file).write_text(f"node: {sha}", encoding="utf-8") (wd.cwd / "data").mkdir() (wd.cwd / "data" / "datafile").touch() - (wd.cwd / "data" / "datalink").symlink_to("data/datafile") + + datalink = wd.cwd / "data" / "datalink" + if sys.platform != "win32": + datalink.symlink_to("data/datafile") + else: + os.link("data/datafile", datalink) + assert set(find_files()) == _sep({archive_file, "data/datafile", "data/datalink"}) -- cgit v1.2.1