dfetch-org · spoorcc · Oct 18, 2025 · Oct 18, 2025 · Oct 18, 2025 · Nov 4, 2025
diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml
@@ -43,6 +43,7 @@ jobs:
       - run: dfetch check
       - run: dfetch update
       - run: dfetch update
+      - run: dfetch filter
       - run: dfetch report -t sbom
       - name: Dfetch SARIF Check
         uses: ./
@@ -56,6 +57,7 @@ jobs:
         run: |
             dfetch update
             dfetch update
+            find . | dfetch filter
             dfetch report
 
   test:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,17 +14,17 @@ repos:
     hooks:
     -   id: isort
         name: Sort import
-        entry: isort
+        entry: dfetch
+        args: ['filter','--not-dfetched', 'isort']
         language: system
         types: [file, python]
-        exclude: ^doc/_ext/sphinxcontrib_asciinema
 
     -   id: black
         name: Black (auto-format)
-        entry: black
+        entry: dfetch
+        args: ['filter', '--not-dfetched', 'black']
         language: system
         types: [file, python]
-        exclude: ^doc/_ext/sphinxcontrib_asciinema
 
     -   id: pylint
         name: pylint
@@ -101,9 +101,10 @@ repos:
     -   id: codespell
         name: codespell
         description: Checks for common misspellings in text files.
-        entry: codespell
+        entry: dfetch
+        args: ['filter', '--not-dfetched','codespell']
         language: python
-        exclude: ^doc/_ext/sphinxcontrib_asciinema/_static/asciinema-player_3.12.1.js
+        # exclude: ^doc/_ext/sphinxcontrib_asciinema/_static/asciinema-player_3.12.1.js
         types: [text]
     -   id: ruff
         name: ruff

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -15,6 +15,7 @@ Release 0.11.0 (unreleased)
 * Handle SVN tags with special characters (#811)
 * Don't return non-zero exit code if tool not found during environment (#701)
 * Create standalone binaries for Linux, Mac & Windows (#705)
+* Add filter command (#19)
 
 Release 0.10.0 (released 2025-03-12)
 ====================================

diff --git a/dfetch/__main__.py b/dfetch/__main__.py
@@ -10,6 +10,7 @@
 import dfetch.commands.check
 import dfetch.commands.diff
 import dfetch.commands.environment
+import dfetch.commands.filter
 import dfetch.commands.freeze
 import dfetch.commands.import_
 import dfetch.commands.init
@@ -29,7 +30,9 @@ class DfetchFatalException(Exception):
 def create_parser() -> argparse.ArgumentParser:
     """Create the main argument parser."""
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.RawTextHelpFormatter, epilog=__doc__
+        formatter_class=argparse.RawTextHelpFormatter,
+        epilog=__doc__,
+        exit_on_error=False,
     )
     parser.add_argument(
         "--verbose", "-v", action="store_true", help="Increase verbosity"
@@ -40,6 +43,7 @@ def create_parser() -> argparse.ArgumentParser:
     dfetch.commands.check.Check.create_menu(subparsers)
     dfetch.commands.diff.Diff.create_menu(subparsers)
     dfetch.commands.environment.Environment.create_menu(subparsers)
+    dfetch.commands.filter.Filter.create_menu(subparsers)
     dfetch.commands.freeze.Freeze.create_menu(subparsers)
     dfetch.commands.import_.Import.create_menu(subparsers)
     dfetch.commands.init.Init.create_menu(subparsers)
@@ -57,8 +61,15 @@ def _help(args: argparse.Namespace) -> None:
 
 def run(argv: Sequence[str]) -> None:
     """Start dfetch."""
-    logger.print_title()
-    args = create_parser().parse_args(argv)
+    parser = create_parser()
+    try:
+        args = parser.parse_args(argv)
+    except argparse.ArgumentError as exc:
+        logger.print_title()
+        parser.error(exc.message)
+
+    if args.verbose or not args.func.silent():
+        logger.print_title()
 
     if args.verbose:
         dfetch.log.increase_verbosity()

diff --git a/dfetch/commands/command.py b/dfetch/commands/command.py
@@ -31,6 +31,17 @@ class Command(ABC):
 
     CHILD_TYPE = TypeVar("CHILD_TYPE", bound="Command")  # noqa
 
+    @staticmethod
+    def silent() -> bool:
+        """Whether the command is silent.
+
+        If a command is silent the title will not be printed when the command is run.
+
+        Returns:
+            bool: True if the command is silent, False otherwise.
+        """
+        return False
+
     @staticmethod
     @abstractmethod
     def create_menu(subparsers: SubparserActionType) -> None:

diff --git a/dfetch/commands/filter.py b/dfetch/commands/filter.py
@@ -0,0 +1,210 @@
+"""*Dfetch* can filter files in the repo.
+
+It can either accept no input to list all files. A list of files can be piped in (such as through ``find``)
+or it can be used as a wrapper around a certain tool to block or allow files under control by dfetch.
+
+.. scenario-include:: ../features/filter-projects.feature
+
+"""
+
+import argparse
+import os
+import sys
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+
+import dfetch.commands.command
+import dfetch.log
+import dfetch.manifest.manifest
+from dfetch.log import get_logger
+from dfetch.util.cmdline import run_on_cmdline_uncaptured
+from dfetch.util.util import in_directory
+
+logger = get_logger(__name__)
+
+
+class FilterType(Enum):
+    """Types of filtering."""
+
+    BLOCK_ONLY_PATH_TRAVERSAL = 0
+    BLOCK_IF_INSIDE = 1
+    BLOCK_IF_OUTSIDE = 2
+
+
+class Filter(dfetch.commands.command.Command):
+    """Filter files based on flags and pass on any command.
+
+    Based on the provided arguments filter files, and call the given arguments or print them out if no command given.
+    """
+
+    @staticmethod
+    def silent() -> bool:
+        """If the command is silent the title will not be printed when the command is run."""
+        return True
+
+    @staticmethod
+    def create_menu(subparsers: dfetch.commands.command.SubparserActionType) -> None:
+        """Add the parser menu for this action."""
+        parser = dfetch.commands.command.Command.parser(subparsers, Filter)
+        parser.add_argument(
+            "--dfetched",
+            "-D",
+            action="store_true",
+            default=True,
+            help="Keep files that came here by dfetching them.",
+        )
+
+        parser.add_argument(
+            "--not-dfetched",
+            "-N",
+            action="store_true",
+            default=False,
+            help="Keep files that did not came here by dfetching them.",
+        )
+
+        parser.add_argument(
+            "cmd",
+            metavar="<cmd>",
+            type=str,
+            nargs="?",
+            help="Command to call",
+        )
+
+        parser.add_argument(
+            "args",
+            metavar="<args>",
+            type=str,
+            nargs="*",
+            help="Arguments to pass to the command",
+        )
+
+    def __call__(self, args: argparse.Namespace) -> None:
+        """Perform the filter."""
+        if not args.verbose:
+            dfetch.log.set_level("ERROR")
+
+        argument_list = self._get_arguments(args)
+
+        manifest = dfetch.manifest.manifest.get_manifest()
+        topdir = Path(manifest.path).parent
+
+        resolved_args = self._resolve_args(argument_list, topdir)
+
+        with in_directory(topdir):
+            abs_project_paths = {
+                Path(project.destination).resolve() for project in manifest.projects
+            }
+
+        if args.dfetched and not args.not_dfetched:
+            block_type = FilterType.BLOCK_IF_OUTSIDE
+        elif args.not_dfetched:
+            block_type = FilterType.BLOCK_IF_INSIDE
+        else:
+            block_type = FilterType.BLOCK_ONLY_PATH_TRAVERSAL
+
+        filtered_args = self._filter_args(
+            topdir, resolved_args, abs_project_paths, block_type
+        )
+
+        if args.cmd:
+            run_on_cmdline_uncaptured(logger, [args.cmd] + filtered_args)
+        else:
+            print(os.linesep.join(filtered_args))
+
+    def _filter_args(
+        self,
+        topdir: Path,
+        resolved_args: dict[str, Optional[Path]],
+        abs_project_paths: set[Path],
+        block: FilterType,
+    ) -> list[str]:
+        blocklist = self._filter_files(
+            topdir,
+            abs_project_paths,
+            {path for path in resolved_args.values() if path},
+            block,
+        )
+
+        filtered_args = [
+            arg for arg in resolved_args.keys() if resolved_args[arg] not in blocklist
+        ]
+
+        return filtered_args
+
+    def _resolve_args(
+        self, argument_list: list[str], topdir: Path
+    ) -> dict[str, Optional[Path]]:
+        resolved_args: dict[str, Optional[Path]] = {}
+        if argument_list:
+            for argument in argument_list:
+                path_obj = Path(argument.strip())
+                resolved_args[argument] = (
+                    path_obj.resolve() if path_obj.exists() else None
+                )
+        else:
+            if not argument_list:
+                resolved_args = {
+                    str(file): file.resolve()
+                    for file in topdir.rglob("*")
+                    if ".git" not in file.parts
+                }
+
+        return resolved_args
+
+    def _get_arguments(self, args: argparse.Namespace) -> list[str]:
+        argument_list: list[str] = list(str(arg) for arg in args.args)
+        if not sys.stdin.isatty():
+            argument_list.extend(
+                non_empty_line for line in sys.stdin if (non_empty_line := line.strip())
+            )
+
+        return argument_list
+
+    def _filter_files(
+        self,
+        topdir: Path,
+        paths: set[Path],
+        input_paths: set[Path],
+        block: FilterType = FilterType.BLOCK_IF_OUTSIDE,
+    ) -> list[Path]:
+        """Filter files in input_set in files in one of the paths or not."""
+        blocklist: list[Path] = []
+
+        for abs_path in input_paths:
+            try:
+                abs_path.relative_to(topdir)
+            except ValueError:
+                logger.print_info_line(str(abs_path), "outside project")
+                blocklist.append(abs_path)
+                continue
+
+            if block == FilterType.BLOCK_ONLY_PATH_TRAVERSAL:
+                continue
+
+            containing_dir = self._is_file_contained_in_any_path(abs_path, paths)
+
+            if containing_dir:
+                logger.print_info_line(
+                    str(abs_path), f"inside project ({containing_dir})"
+                )
+                if block == FilterType.BLOCK_IF_INSIDE:
+                    blocklist.append(abs_path)
+            else:
+                logger.print_info_line(str(abs_path), "not inside any project")
+                if block == FilterType.BLOCK_IF_OUTSIDE:
+                    blocklist.append(abs_path)
+
+        return blocklist
+
+    def _is_file_contained_in_any_path(
+        self, file: Path, paths: set[Path]
+    ) -> Optional[Path]:
+        """Check if a specific file is somewhere in one of the paths."""
+        for path in paths:
+            try:
+                file.relative_to(path)
+                return path
+            except ValueError:
+                continue
+        return None
diff --git a/dfetch/log.py b/dfetch/log.py
@@ -57,6 +57,11 @@ def increase_verbosity() -> None:
     coloredlogs.increase_verbosity()
 
 
+def set_level(level: str) -> None:
+    """Set the level of the logger."""
+    coloredlogs.set_level(level)
+
+
 def get_logger(name: str) -> DLogger:
     """Get logger for a module."""
     logging.setLoggerClass(DLogger)

diff --git a/dfetch/util/cmdline.py b/dfetch/util/cmdline.py
@@ -69,6 +69,34 @@ def run_on_cmdline(
     return proc
 
 
+def run_on_cmdline_uncaptured(
+    logger: logging.Logger, cmd: Union[str, list[str]]
+) -> "subprocess.CompletedProcess[Any]":
+    """Run a command and log the output, and raise if something goes wrong."""
+    logger.debug(f"Running {cmd}")
+
+    if not isinstance(cmd, list):
+        cmd = cmd.split(" ")
+
+    try:
+        proc = subprocess.run(cmd, capture_output=False, check=True)  # nosec
+    except subprocess.CalledProcessError as exc:
+        raise SubprocessCommandError(
+            exc.cmd,
+            "",
+            "",
+            exc.returncode,
+        ) from exc
+    except FileNotFoundError as exc:
+        cmd = cmd[0]
+        raise RuntimeError(f"{cmd} not available on system, please install") from exc
+
+    if proc.returncode:
+        raise SubprocessCommandError(cmd, "", "", proc.returncode)
+
+    return proc
+
+
 def _log_output(proc: subprocess.CompletedProcess, logger: logging.Logger) -> None:  # type: ignore
     logger.debug(f"Return code: {proc.returncode}")
 

diff --git a/dfetch/util/util.py b/dfetch/util/util.py
@@ -63,14 +63,14 @@ def safe_rmtree(path: str) -> None:
 
 
 @contextmanager
-def in_directory(path: str) -> Generator[str, None, None]:
+def in_directory(path: Union[str, Path]) -> Generator[str, None, None]:
     """Work temporarily in a given directory."""
     pwd = os.getcwd()
     if not os.path.isdir(path):
         path = os.path.dirname(path)
     os.chdir(path)
     try:
-        yield path
+        yield str(path)
     finally:
         os.chdir(pwd)