From ecbbebfc0ee769bdc01f8ce1a0cb0868622a0feb Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 07:11:53 +0000 Subject: [PATCH 1/8] fix: exclude submodules outside src folder when src is specified When a subproject specifies a src directory, git submodule update --init would clone ALL submodules (including those outside src), leaving their directories in the destination. _apply_src_and_ignore now detects which submodules are outside src (path unchanged by strip_glob_prefix), removes their top-level directory, and excludes them from the returned list. Two new BDD scenarios cover this: - plain src directory (non-glob) with a submodule inside - mixed submodules inside and outside src, only inside is fetched https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/vcs/git.py | 9 ++- .../fetch-git-repo-with-submodule.feature | 65 +++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 9d6806f1f..b976f48f1 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -420,8 +420,15 @@ def _apply_src_and_ignore( ) -> list[Submodule]: """Apply src filter and ignore patterns, returning surviving submodules.""" if src: + within_src = [] for submodule in submodules: - submodule.path = strip_glob_prefix(submodule.path, src) + new_path = strip_glob_prefix(submodule.path, src) + if new_path != submodule.path: + submodule.path = new_path + within_src.append(submodule) + else: + safe_rm(Path(submodule.path).parts[0], within=".") + submodules = within_src self._move_src_folder_up(remote, src) for ignore_path in ignore or []: diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature index 8cd3eb187..54ef91da4 100644 --- a/features/fetch-git-repo-with-submodule.feature +++ b/features/fetch-git-repo-with-submodule.feature @@ -154,3 +154,68 @@ Feature: Fetch projects with nested VCS dependencies test-repo/ README.md """ + + Scenario: A submodule within a plain src directory is fetched + Given a git-repository "PlainSrcProject.git" with the following submodules + | path | url | revision | + | src_folder/ext/sub-repo | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: plain-src-project + url: some-remote-server/PlainSrcProject.git + src: src_folder + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.13.0) + plain-src-project: + > Found & fetched submodule "./ext/sub-repo" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + plain-src-project/ + .dfetch_data.yaml + ext/ + sub-repo/ + README.md + """ + + Scenario: A submodule outside the src folder is not fetched when src is specified + Given a git-repository "MixedSubmoduleProject.git" with the following submodules + | path | url | revision | + | src_folder/ext/inside | some-remote-server/TestRepo.git | master | + | other_ext/outside | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: mixed-project + url: some-remote-server/MixedSubmoduleProject.git + src: src_folder + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.13.0) + mixed-project: + > Found & fetched submodule "./ext/inside" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + mixed-project/ + .dfetch_data.yaml + ext/ + inside/ + README.md + """ From 3b17eca9da1eee2cac0ef03c39daf434f5386d00 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 07:20:57 +0000 Subject: [PATCH 2/8] refactor: fix xenon and mypy issues found by pre-commit - Extract _filter_submodules_by_src from _apply_src_and_ignore to keep cyclomatic complexity within xenon's block-grade-B limit - Fix mypy strict error in log.py: replace LogRender inheritance with composition to avoid subclassing an untyped (Any) class - Fix mypy strict error in license.py: unpack probable_licenses[0] explicitly so mypy sees exactly two positional args to from_inferred, eliminating the spurious "multiple values for keyword argument 'text'" https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/log.py | 11 +++++++++-- dfetch/util/license.py | 9 ++++----- dfetch/vcs/git.py | 26 ++++++++++++++++---------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/dfetch/log.py b/dfetch/log.py index 340190bd7..8bb3e21fa 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -17,15 +17,22 @@ from dfetch import __version__ -class _NoExpandLogRender(LogRender): # pylint: disable=too-few-public-methods +class _NoExpandLogRender: # pylint: disable=too-few-public-methods """LogRender that disables table expansion to prevent blank lines in asciicasts.""" + def __init__(self, **kwargs: Any) -> None: + self._render: Any = LogRender(**kwargs) + def __call__(self, *args: Any, **kwargs: Any) -> Any: """Render log entry without expanding the table to the full terminal width.""" - table = super().__call__(*args, **kwargs) + table = self._render(*args, **kwargs) table.expand = False return table + def __getattr__(self, name: str) -> Any: + """Forward attribute lookups to the wrapped LogRender instance.""" + return getattr(self._render, name) + def make_console(no_color: bool = False) -> Console: """Create a Rich Console with proper color handling.""" diff --git a/dfetch/util/license.py b/dfetch/util/license.py index 312365334..dc335fc4c 100644 --- a/dfetch/util/license.py +++ b/dfetch/util/license.py @@ -119,8 +119,7 @@ def guess_license_in_file( probable_licenses = infer_license.api.probabilities(license_text) - return ( - None - if not probable_licenses - else License.from_inferred(*probable_licenses[0], text=license_text) - ) + if not probable_licenses: + return None + inferred, probability = probable_licenses[0] + return License.from_inferred(inferred, probability, text=license_text) diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index b976f48f1..cf14a7885 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -420,16 +420,7 @@ def _apply_src_and_ignore( ) -> list[Submodule]: """Apply src filter and ignore patterns, returning surviving submodules.""" if src: - within_src = [] - for submodule in submodules: - new_path = strip_glob_prefix(submodule.path, src) - if new_path != submodule.path: - submodule.path = new_path - within_src.append(submodule) - else: - safe_rm(Path(submodule.path).parts[0], within=".") - submodules = within_src - self._move_src_folder_up(remote, src) + submodules = self._filter_submodules_by_src(remote, src, submodules) for ignore_path in ignore or []: paths = [ @@ -441,6 +432,21 @@ def _apply_src_and_ignore( return [s for s in submodules if os.path.exists(s.path)] + def _filter_submodules_by_src( + self, remote: str, src: str, submodules: list[Submodule] + ) -> list[Submodule]: + """Keep only submodules within *src*, remove others, then promote *src* to root.""" + within_src = [] + for submodule in submodules: + new_path = strip_glob_prefix(submodule.path, src) + if new_path != submodule.path: + submodule.path = new_path + within_src.append(submodule) + else: + safe_rm(Path(submodule.path).parts[0], within=".") + self._move_src_folder_up(remote, src) + return within_src + @staticmethod def _collect_safe_paths(src: str, repo_root: Path, remote: str) -> list[str]: """Return glob-matched paths for *src* that are within *repo_root*. From 4ecbb9521e6c9bef72c440c61d8c61b3b1a4e73a Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 08:39:32 +0000 Subject: [PATCH 3/8] refactor: deduplicate safe_rm calls for outside-src submodule dirs When multiple submodules outside src share the same top-level directory, collect unique top-level dirs first and call safe_rm once per dir instead of once per submodule. https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/vcs/git.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index cf14a7885..c1b88f9a0 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -437,13 +437,16 @@ def _filter_submodules_by_src( ) -> list[Submodule]: """Keep only submodules within *src*, remove others, then promote *src* to root.""" within_src = [] + to_remove: set[str] = set() for submodule in submodules: new_path = strip_glob_prefix(submodule.path, src) if new_path != submodule.path: submodule.path = new_path within_src.append(submodule) else: - safe_rm(Path(submodule.path).parts[0], within=".") + to_remove.add(Path(submodule.path).parts[0]) + for top_dir in to_remove: + safe_rm(top_dir, within=".") self._move_src_folder_up(remote, src) return within_src From ec6a1cc8ad2a031e3abd7bff7ceb860faf30b044 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 08:53:19 +0000 Subject: [PATCH 4/8] fix: restore LogRender inheritance to fix CI mypy assignment error Composition broke CI because handler._log_render is typed as LogRender in rich's stubs there, making the assignment incompatible. Revert to inheritance; use type: ignore[misc] to suppress the local-only error where rich._log_render has no stubs and LogRender resolves to Any. warn_unused_ignores=false means the comment is harmless in CI. https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/log.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/dfetch/log.py b/dfetch/log.py index 8bb3e21fa..29db4e35e 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -17,22 +17,15 @@ from dfetch import __version__ -class _NoExpandLogRender: # pylint: disable=too-few-public-methods +class _NoExpandLogRender(LogRender): # type: ignore[misc] # pylint: disable=too-few-public-methods """LogRender that disables table expansion to prevent blank lines in asciicasts.""" - def __init__(self, **kwargs: Any) -> None: - self._render: Any = LogRender(**kwargs) - def __call__(self, *args: Any, **kwargs: Any) -> Any: """Render log entry without expanding the table to the full terminal width.""" - table = self._render(*args, **kwargs) + table = super().__call__(*args, **kwargs) table.expand = False return table - def __getattr__(self, name: str) -> Any: - """Forward attribute lookups to the wrapped LogRender instance.""" - return getattr(self._render, name) - def make_console(no_color: bool = False) -> Console: """Create a Rich Console with proper color handling.""" From 36dd88255abd0d84c9d5d1c0f80d2224860e75e9 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 11:44:50 +0000 Subject: [PATCH 5/8] fix: address review comments on submodule filtering and log handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git.py – _filter_submodules_by_src: - Issue 1: submodule whose path exactly equals src was treated as outside-scope because strip_glob_prefix returns unchanged when path depth <= pattern depth. Now checked first; submodule is added to within_src without altering its path so _move_src_folder_up can promote it correctly. - Issue 2: using parts[0] as the ancestor to remove could clobber the src tree when src is nested (e.g. src: apps/myapp, submodule: apps/other would incorrectly remove apps/). Now uses Path(src).is_relative_to() to detect a shared ancestor and falls back to the exact submodule path; the shared ancestor is cleaned up automatically by _move_src_folder_up. log.py – _NoExpandRichHandler: Replace _NoExpandLogRender (which mutated handler._log_render, a protected attribute) with a proper RichHandler subclass that overrides the public render() method to set table.expand = False. configure_root_logger now selects the subclass via handler_class rather than post-construction mutation, removing the protected-access suppression entirely. https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/log.py | 55 +++++++++++++++++++++++++++++------------------ dfetch/vcs/git.py | 20 ++++++++++++++--- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/dfetch/log.py b/dfetch/log.py index 29db4e35e..d0136b430 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -1,30 +1,53 @@ """Logging related items.""" +from __future__ import annotations + import logging import os import sys import types from contextlib import nullcontext -from typing import Any, cast +from logging import LogRecord +from typing import TYPE_CHECKING, Any, cast -from rich._log_render import LogRender # type: ignore[import-untyped] -from rich.console import Console +from rich.console import Console, ConsoleRenderable from rich.highlighter import NullHighlighter from rich.logging import RichHandler from rich.markup import escape as markup_escape from rich.status import Status +from rich.table import Table from dfetch import __version__ +if TYPE_CHECKING: + from rich.traceback import Traceback + + +class _NoExpandRichHandler(RichHandler): # type: ignore[misc] # pylint: disable=too-few-public-methods + """RichHandler that disables table expansion to prevent blank lines in asciicasts. -class _NoExpandLogRender(LogRender): # type: ignore[misc] # pylint: disable=too-few-public-methods - """LogRender that disables table expansion to prevent blank lines in asciicasts.""" + Rich's LogRender uses expand=True on its Table.grid, which pads every + log message with trailing spaces to fill the full terminal width. When + asciinema records the output the padded line fills the terminal exactly, + causing the subsequent newline to produce a blank line in the cast + player. Overriding render to set expand=False removes the trailing + spaces and avoids the spurious blank lines. + """ - def __call__(self, *args: Any, **kwargs: Any) -> Any: + def render( + self, + *, + record: LogRecord, + traceback: Traceback | None, + message_renderable: ConsoleRenderable, + ) -> ConsoleRenderable: """Render log entry without expanding the table to the full terminal width.""" - table = super().__call__(*args, **kwargs) - table.expand = False - return table + renderable = super().render( + record=record, traceback=traceback, message_renderable=message_renderable + ) + if isinstance(renderable, Table): + renderable.expand = False + return renderable def make_console(no_color: bool = False) -> Console: @@ -40,7 +63,8 @@ def configure_root_logger(console: Console | None = None) -> None: """Configure the root logger with RichHandler using the provided Console.""" console = console or make_console() - handler = RichHandler( + handler_class = _NoExpandRichHandler if os.getenv("ASCIINEMA_REC") else RichHandler + handler = handler_class( console=console, show_time=False, show_path=False, @@ -50,17 +74,6 @@ def configure_root_logger(console: Console | None = None) -> None: highlighter=NullHighlighter(), ) - if os.getenv("ASCIINEMA_REC"): - # Rich's LogRender uses expand=True on its Table.grid, which pads every - # log message with trailing spaces to fill the full terminal width. When - # asciinema records the output the padded line fills the terminal exactly, - # causing the subsequent newline to produce a blank line in the cast - # player. Wrapping _log_render so it returns a non-expanding table - # removes the trailing spaces and avoids the spurious blank lines. - handler._log_render = _NoExpandLogRender( # pylint: disable=protected-access - show_time=False, show_level=False, show_path=False - ) - logging.basicConfig( level=logging.INFO, format="%(message)s", diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index c1b88f9a0..77a152af9 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -439,14 +439,28 @@ def _filter_submodules_by_src( within_src = [] to_remove: set[str] = set() for submodule in submodules: + if submodule.path == src: + # Submodule IS the src directory itself; keep it in-scope without + # altering its path and let _move_src_folder_up handle promotion. + within_src.append(submodule) + continue new_path = strip_glob_prefix(submodule.path, src) if new_path != submodule.path: submodule.path = new_path within_src.append(submodule) else: - to_remove.add(Path(submodule.path).parts[0]) - for top_dir in to_remove: - safe_rm(top_dir, within=".") + sub_top = Path(submodule.path).parts[0] + # Only remove the top-level component when it is provably disjoint + # from src; if src lives under that same ancestor, removing it would + # clobber the src tree before _move_src_folder_up can promote it. + # In that case use the exact submodule path; the shared ancestor is + # cleaned up automatically by _move_src_folder_up. + if Path(src).is_relative_to(sub_top): + to_remove.add(submodule.path) + else: + to_remove.add(sub_top) + for path in to_remove: + safe_rm(path, within=".") self._move_src_folder_up(remote, src) return within_src From 84c6b7674fb7688c5ff5f3d5ada770583fb41fb4 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 12:18:53 +0000 Subject: [PATCH 6/8] Remove unnecessary pylint suppression from _NoExpandRichHandler; fix submodule ancestor check; add unit tests - log.py: remove `pylint: disable=too-few-public-methods` from _NoExpandRichHandler (it inherits many methods from RichHandler so pylint never flags it); retain `type: ignore[misc]` which mypy requires because rich lacks complete type stubs. - vcs/git.py: in _filter_submodules_by_src replace the fragile two-branch `sub_top` guard with a single `Path(src).is_relative_to(Path(submodule.path))` check. The old code computed the top-level path component separately and could mistakenly remove a top-level submodule whose full path is actually an ancestor of src (e.g. submodule at "apps", src="apps/myapp"). The new code tests the full path and skips removal with `continue`, leaving the ancestor submodule in place so _move_src_folder_up can promote its content correctly. - tests/test_git_vcs.py: add two new unit tests for _filter_submodules_by_src: - test_filter_submodules_ancestor_of_src_not_removed: proves that a submodule at "apps" is not deleted when src="apps/myapp" and that the inner content is promoted to root. - test_filter_submodules_disjoint_submodule_removed: proves that a truly out-of-scope submodule (different top-level dir) is still removed. https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/log.py | 2 +- dfetch/vcs/git.py | 13 ++------ tests/test_git_vcs.py | 77 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 11 deletions(-) diff --git a/dfetch/log.py b/dfetch/log.py index d0136b430..aad877426 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -23,7 +23,7 @@ from rich.traceback import Traceback -class _NoExpandRichHandler(RichHandler): # type: ignore[misc] # pylint: disable=too-few-public-methods +class _NoExpandRichHandler(RichHandler): # type: ignore[misc] """RichHandler that disables table expansion to prevent blank lines in asciicasts. Rich's LogRender uses expand=True on its Table.grid, which pads every diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 77a152af9..449b7220d 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -449,16 +449,9 @@ def _filter_submodules_by_src( submodule.path = new_path within_src.append(submodule) else: - sub_top = Path(submodule.path).parts[0] - # Only remove the top-level component when it is provably disjoint - # from src; if src lives under that same ancestor, removing it would - # clobber the src tree before _move_src_folder_up can promote it. - # In that case use the exact submodule path; the shared ancestor is - # cleaned up automatically by _move_src_folder_up. - if Path(src).is_relative_to(sub_top): - to_remove.add(submodule.path) - else: - to_remove.add(sub_top) + if Path(src).is_relative_to(Path(submodule.path)): + continue + to_remove.add(Path(submodule.path).parts[0]) for path in to_remove: safe_rm(path, within=".") self._move_src_folder_up(remote, src) diff --git a/tests/test_git_vcs.py b/tests/test_git_vcs.py index 891d5a0e0..37efc438a 100644 --- a/tests/test_git_vcs.py +++ b/tests/test_git_vcs.py @@ -16,6 +16,7 @@ GitRemote, _build_git_ssh_command, ) +from dfetch.vcs.git_types import Submodule # --------------------------------------------------------------------------- # unique_parent_dirs (dfetch.util.util) @@ -104,6 +105,82 @@ def test_move_src_folder_up_rejects_traversal_src(tmp_path): mock_move.assert_not_called() +# --------------------------------------------------------------------------- +# GitLocalRepo._filter_submodules_by_src +# --------------------------------------------------------------------------- + + +def _make_submodule(path: str, url: str = "some-url") -> Submodule: + return Submodule( + name=path, + toplevel="", + path=path, + sha="abc123", + url=url, + branch="master", + tag="", + ) + + +def test_filter_submodules_ancestor_of_src_not_removed(tmp_path, monkeypatch): + """A submodule whose path is an ancestor of src must not be deleted. + + When src='apps/myapp' and a submodule exists at 'apps', the old logic + incorrectly added 'apps' to to_remove (because Path('apps/myapp').is_relative_to('apps') + was True with sub_top='apps'). The fix checks the full submodule.path so that an + ancestor submodule is skipped and _move_src_folder_up can promote its content. + """ + (tmp_path / "apps" / "myapp").mkdir(parents=True) + (tmp_path / "apps" / "myapp" / "README.md").write_text("content") + (tmp_path / "outside").mkdir() + (tmp_path / "outside" / "file.txt").write_text("content") + + monkeypatch.chdir(tmp_path) + repo = GitLocalRepo(str(tmp_path)) + result = repo._filter_submodules_by_src( + "remote-url", + "apps/myapp", + [_make_submodule("apps"), _make_submodule("outside")], + ) + + assert ( + tmp_path / "README.md" + ).exists(), "apps/myapp content should be promoted to root" + assert not (tmp_path / "outside").exists(), "outside/ submodule should be removed" + assert not any( + s.path == "apps" for s in result + ), "ancestor submodule must not appear in result" + assert not any( + s.path == "outside" for s in result + ), "out-of-scope submodule must not appear in result" + + +def test_filter_submodules_disjoint_submodule_removed(tmp_path, monkeypatch): + """A submodule whose path is disjoint from src must be removed.""" + (tmp_path / "src_folder" / "ext" / "inside").mkdir(parents=True) + (tmp_path / "src_folder" / "ext" / "inside" / "README.md").write_text("content") + (tmp_path / "other_ext" / "outside").mkdir(parents=True) + (tmp_path / "other_ext" / "outside" / "README.md").write_text("content") + + monkeypatch.chdir(tmp_path) + repo = GitLocalRepo(str(tmp_path)) + result = repo._filter_submodules_by_src( + "remote-url", + "src_folder", + [ + _make_submodule("src_folder/ext/inside"), + _make_submodule("other_ext/outside"), + ], + ) + + assert not ( + tmp_path / "other_ext" + ).exists(), "other_ext/ should be removed (outside src)" + assert any( + s.path == "ext/inside" for s in result + ), "inside submodule should be promoted" + + @pytest.mark.parametrize( "name, cmd_result, expectation", [ From fe5bdccf6815d4a27cc53347dfb2890b4beddc56 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 22:06:12 +0000 Subject: [PATCH 7/8] Fix submodule filter to remove exact paths instead of top-level dirs; handle empty parents and .git collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix used Path(submodule.path).parts[0] in to_remove, which for a sibling submodule (e.g. apps/widget when src=apps/lib) would add the shared top-level dir 'apps' to to_remove and call safe_rm('apps'), destroying the already-cloned apps/lib content before _move_src_folder_up could promote it. Changes: - vcs/git.py: _filter_submodules_by_src now adds the exact submodule.path to to_remove instead of its first path component, so only the precise out-of-scope submodule directory is deleted. - vcs/git.py: add _remove_empty_parents static helper that walks up from each removed path and rmdir()s any ancestor directories that become empty. git submodule update creates parent dirs (e.g. other_ext/) for submodules excluded by sparse-checkout; the old top-level removal cleaned those up for free, so the helper restores that behaviour without the sibling-clobbering side-effect. - vcs/git.py: _apply_move now pre-removes .git and .gitmodules from the root of the chosen directory before promoting its contents to the repo root. When src is itself a cloned submodule (e.g. src: apps/lib), the submodule's .git file would collide with the parent repo's .git directory; removing it beforehand avoids the shutil.Error. The caller (gitsubproject) already cleans up these files recursively after checkout completes. - tests/test_git_vcs.py: update test_filter_submodules_disjoint_submodule_removed to assert on the exact submodule path (not its parent); add new test_filter_submodules_sibling_of_src_not_removed which proves the bug — with the old code safe_rm('apps') would remove apps/lib/ before promotion, so README.md would never reach root. - features/fetch-git-repo-with-submodule.feature: add end-to-end scenario "A sibling submodule at the same top-level dir as src is not fetched" that fails under the old top-level removal logic and passes with the fix. https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/vcs/git.py | 27 +++++++++++- .../fetch-git-repo-with-submodule.feature | 30 +++++++++++++ tests/test_git_vcs.py | 44 +++++++++++++++++-- 3 files changed, 97 insertions(+), 4 deletions(-) diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 449b7220d..0bc762b4c 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -451,12 +451,31 @@ def _filter_submodules_by_src( else: if Path(src).is_relative_to(Path(submodule.path)): continue - to_remove.add(Path(submodule.path).parts[0]) + to_remove.add(submodule.path) for path in to_remove: safe_rm(path, within=".") + GitLocalRepo._remove_empty_parents(to_remove) self._move_src_folder_up(remote, src) return within_src + @staticmethod + def _remove_empty_parents(paths: set[str]) -> None: + """Remove empty ancestor directories left after removing out-of-scope submodule dirs. + + git submodule update may create a parent directory for a submodule even when + sparse-checkout excludes it; after safe_rm removes the exact submodule path the + parent can be left as an empty directory. os.rmdir is used because it is atomic + and raises OSError when the directory is not empty, which stops the upward walk. + """ + for path in paths: + parent = Path(path).parent + while parent != Path("."): + try: + parent.rmdir() + except OSError: + break + parent = parent.parent + @staticmethod def _collect_safe_paths(src: str, repo_root: Path, remote: str) -> list[str]: """Return glob-matched paths for *src* that are within *repo_root*. @@ -474,6 +493,12 @@ def _collect_safe_paths(src: str, repo_root: Path, remote: str) -> list[str]: @staticmethod def _apply_move(chosen: Path, repo_root: Path, remote: str) -> None: """Move the contents of *chosen* to the repo root and remove the empty parent.""" + # Pre-remove git metadata at the root of *chosen* before promoting its contents. + # When *chosen* is itself a cloned submodule it contains a .git file that would + # collide with the parent repo's .git directory; the caller cleans these up + # recursively after checkout anyway. + for name in (GitLocalRepo.METADATA_DIR, GitLocalRepo.GIT_MODULES_FILE): + safe_rm(chosen / name, within=chosen) try: move_directory_contents(str(chosen), ".") except FileNotFoundError: diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature index 54ef91da4..c349a3c6e 100644 --- a/features/fetch-git-repo-with-submodule.feature +++ b/features/fetch-git-repo-with-submodule.feature @@ -219,3 +219,33 @@ Feature: Fetch projects with nested VCS dependencies inside/ README.md """ + + Scenario: A sibling submodule at the same top-level dir as src is not fetched + Given a git-repository "SiblingSubmoduleProject.git" with the following submodules + | path | url | revision | + | apps/lib | some-remote-server/TestRepo.git | master | + | apps/widget | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: sibling-project + url: some-remote-server/SiblingSubmoduleProject.git + src: apps/lib + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.13.0) + sibling-project: + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + sibling-project/ + .dfetch_data.yaml + README.md + """ diff --git a/tests/test_git_vcs.py b/tests/test_git_vcs.py index 37efc438a..33b08d149 100644 --- a/tests/test_git_vcs.py +++ b/tests/test_git_vcs.py @@ -156,7 +156,11 @@ def test_filter_submodules_ancestor_of_src_not_removed(tmp_path, monkeypatch): def test_filter_submodules_disjoint_submodule_removed(tmp_path, monkeypatch): - """A submodule whose path is disjoint from src must be removed.""" + """A submodule whose path is disjoint from src must be removed. + + The fix stores the full submodule.path (not its top-level component) in + to_remove, so only the exact submodule directory is deleted. + """ (tmp_path / "src_folder" / "ext" / "inside").mkdir(parents=True) (tmp_path / "src_folder" / "ext" / "inside" / "README.md").write_text("content") (tmp_path / "other_ext" / "outside").mkdir(parents=True) @@ -174,13 +178,47 @@ def test_filter_submodules_disjoint_submodule_removed(tmp_path, monkeypatch): ) assert not ( - tmp_path / "other_ext" - ).exists(), "other_ext/ should be removed (outside src)" + tmp_path / "other_ext" / "outside" + ).exists(), "other_ext/outside submodule dir should be removed" assert any( s.path == "ext/inside" for s in result ), "inside submodule should be promoted" +def test_filter_submodules_sibling_of_src_not_removed(tmp_path, monkeypatch): + """A sibling submodule sharing the same top-level dir as src must not destroy src content. + + When src='apps/lib' and submodules exist at both 'apps/lib' (exact match, the src) + and 'apps/widget' (sibling, outside src), the old logic used parts[0]='apps' and + called safe_rm('apps'), which deleted the entire apps/ directory — including the + already-cloned apps/lib content needed by _move_src_folder_up. + The fix stores the full submodule.path so only apps/widget is targeted, leaving + apps/lib intact for promotion. + """ + (tmp_path / "apps" / "lib").mkdir(parents=True) + (tmp_path / "apps" / "lib" / "README.md").write_text("content") + (tmp_path / "apps" / "widget").mkdir(parents=True) + (tmp_path / "apps" / "widget" / "widget.h").write_text("content") + + monkeypatch.chdir(tmp_path) + repo = GitLocalRepo(str(tmp_path)) + result = repo._filter_submodules_by_src( + "remote-url", + "apps/lib", + [_make_submodule("apps/lib"), _make_submodule("apps/widget")], + ) + + assert ( + tmp_path / "README.md" + ).exists(), "apps/lib content must be promoted to root" + assert not ( + tmp_path / "apps" / "widget" + ).exists(), "sibling apps/widget must be removed" + assert any( + s.path == "apps/lib" for s in result + ), "src submodule should appear in result before final os.path.exists filtering" + + @pytest.mark.parametrize( "name, cmd_result, expectation", [ From 3ee3ffb7623d5ade8b651872a4e3f48a2fa205c3 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 22:31:29 +0000 Subject: [PATCH 8/8] Remove inline type: ignore[misc] from _NoExpandRichHandler The pre-commit mypy binary runs in an isolated uv-tool venv that lacks rich, so RichHandler resolved to Any and triggered [misc] on subclassing. Move the suppression to a targeted per-module mypy override (disallow_subclassing_any = false for dfetch.log) so the inline comment is gone and the setting is explicit in configuration. https://claude.ai/code/session_01KjAnpdH8M33gT1yZPraVx8 --- dfetch/log.py | 2 +- pyproject.toml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dfetch/log.py b/dfetch/log.py index aad877426..8dc83dd40 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -23,7 +23,7 @@ from rich.traceback import Traceback -class _NoExpandRichHandler(RichHandler): # type: ignore[misc] +class _NoExpandRichHandler(RichHandler): """RichHandler that disables table expansion to prevent blank lines in asciicasts. Rich's LogRender uses expand=True on its Table.grid, which pads every diff --git a/pyproject.toml b/pyproject.toml index 55a4d287b..e3f038f40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,6 +145,13 @@ ignore_missing_imports = true strict = true warn_unused_ignores = false +[[tool.mypy.overrides]] +module = "dfetch.log" +# RichHandler is untyped in the pre-commit mypy environment (rich not installed there), +# so mypy sees it as Any and raises [misc] when subclassing. Disabling this check for +# this module avoids the false positive without suppressing other errors. +disallow_subclassing_any = false + [tool.doc8] ignore-path = "doc/_build,doc/static/uml/styles/plantuml-c4" max-line-length = 120