Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
- run: dfetch check
- run: dfetch update
- run: dfetch update
- run: dfetch filter
- run: dfetch report -t sbom
- name: Dfetch SARIF Check
uses: ./
Expand All @@ -56,6 +57,7 @@ jobs:
run: |
dfetch update
dfetch update
find . | dfetch filter
dfetch report

test:
Expand Down
13 changes: 7 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ repos:
hooks:
- id: isort
name: Sort import
entry: isort
entry: dfetch
args: ['filter','--not-dfetched', 'isort']
language: system
types: [file, python]
exclude: ^doc/_ext/sphinxcontrib_asciinema

- id: black
name: Black (auto-format)
entry: black
entry: dfetch
args: ['filter', '--not-dfetched', 'black']
language: system
types: [file, python]
exclude: ^doc/_ext/sphinxcontrib_asciinema

- id: pylint
name: pylint
Expand Down Expand Up @@ -101,9 +101,10 @@ repos:
- id: codespell
name: codespell
description: Checks for common misspellings in text files.
entry: codespell
entry: dfetch
args: ['filter', '--not-dfetched','codespell']
language: python
exclude: ^doc/_ext/sphinxcontrib_asciinema/_static/asciinema-player_3.12.1.js
# exclude: ^doc/_ext/sphinxcontrib_asciinema/_static/asciinema-player_3.12.1.js
types: [text]
- id: ruff
name: ruff
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Release 0.11.0 (unreleased)
* Handle SVN tags with special characters (#811)
* Don't return non-zero exit code if tool not found during environment (#701)
* Create standalone binaries for Linux, Mac & Windows (#705)
* Add filter command (#19)

Release 0.10.0 (released 2025-03-12)
====================================
Expand Down
17 changes: 14 additions & 3 deletions dfetch/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import dfetch.commands.check
import dfetch.commands.diff
import dfetch.commands.environment
import dfetch.commands.filter
import dfetch.commands.freeze
import dfetch.commands.import_
import dfetch.commands.init
Expand All @@ -29,7 +30,9 @@ class DfetchFatalException(Exception):
def create_parser() -> argparse.ArgumentParser:
"""Create the main argument parser."""
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter, epilog=__doc__
formatter_class=argparse.RawTextHelpFormatter,
epilog=__doc__,
exit_on_error=False,
)
parser.add_argument(
"--verbose", "-v", action="store_true", help="Increase verbosity"
Expand All @@ -40,6 +43,7 @@ def create_parser() -> argparse.ArgumentParser:
dfetch.commands.check.Check.create_menu(subparsers)
dfetch.commands.diff.Diff.create_menu(subparsers)
dfetch.commands.environment.Environment.create_menu(subparsers)
dfetch.commands.filter.Filter.create_menu(subparsers)
dfetch.commands.freeze.Freeze.create_menu(subparsers)
dfetch.commands.import_.Import.create_menu(subparsers)
dfetch.commands.init.Init.create_menu(subparsers)
Expand All @@ -57,8 +61,15 @@ def _help(args: argparse.Namespace) -> None:

def run(argv: Sequence[str]) -> None:
"""Start dfetch."""
logger.print_title()
args = create_parser().parse_args(argv)
parser = create_parser()
try:
args = parser.parse_args(argv)
except argparse.ArgumentError as exc:
logger.print_title()
parser.error(exc.message)

if args.verbose or not args.func.silent():
logger.print_title()

if args.verbose:
dfetch.log.increase_verbosity()
Expand Down
11 changes: 11 additions & 0 deletions dfetch/commands/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ class Command(ABC):

CHILD_TYPE = TypeVar("CHILD_TYPE", bound="Command") # noqa

@staticmethod
def silent() -> bool:
"""Whether the command is silent.

If a command is silent the title will not be printed when the command is run.

Returns:
bool: True if the command is silent, False otherwise.
"""
return False

@staticmethod
@abstractmethod
def create_menu(subparsers: SubparserActionType) -> None:
Expand Down
210 changes: 210 additions & 0 deletions dfetch/commands/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
"""*Dfetch* can filter files in the repo.

It can either accept no input to list all files. A list of files can be piped in (such as through ``find``)
or it can be used as a wrapper around a certain tool to block or allow files under control by dfetch.

.. scenario-include:: ../features/filter-projects.feature

"""

import argparse
import os
import sys
from enum import Enum
from pathlib import Path
from typing import Optional

import dfetch.commands.command
import dfetch.log
import dfetch.manifest.manifest
from dfetch.log import get_logger
from dfetch.util.cmdline import run_on_cmdline_uncaptured
from dfetch.util.util import in_directory

logger = get_logger(__name__)


class FilterType(Enum):
"""Types of filtering."""

BLOCK_ONLY_PATH_TRAVERSAL = 0
BLOCK_IF_INSIDE = 1
BLOCK_IF_OUTSIDE = 2


class Filter(dfetch.commands.command.Command):
"""Filter files based on flags and pass on any command.

Based on the provided arguments filter files, and call the given arguments or print them out if no command given.
"""

@staticmethod
def silent() -> bool:
"""If the command is silent the title will not be printed when the command is run."""
return True

@staticmethod
def create_menu(subparsers: dfetch.commands.command.SubparserActionType) -> None:
"""Add the parser menu for this action."""
parser = dfetch.commands.command.Command.parser(subparsers, Filter)
parser.add_argument(
"--dfetched",
"-D",
action="store_true",
default=True,
help="Keep files that came here by dfetching them.",
)

parser.add_argument(
"--not-dfetched",
"-N",
action="store_true",
default=False,
help="Keep files that did not came here by dfetching them.",
)

parser.add_argument(
"cmd",
metavar="<cmd>",
type=str,
nargs="?",
help="Command to call",
)

parser.add_argument(
"args",
metavar="<args>",
type=str,
nargs="*",
help="Arguments to pass to the command",
)

def __call__(self, args: argparse.Namespace) -> None:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mixed Responsibilities in Entry Point category Design

Tell me more
What is the issue?

The call method mixes configuration, business logic, and output handling in a single method.

Why this matters

This violates the Single Responsibility Principle and makes the code less maintainable and harder to test individual components.

Suggested change ∙ Feature Preview

Split the call method into separate methods for configuration, filtering, and output handling:

def __call__(self, args: argparse.Namespace) -> None:
    self._configure_logging(args)
    filtered_args = self._process_filtering(args)
    self._handle_output(args, filtered_args)
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

"""Perform the filter."""
if not args.verbose:
dfetch.log.set_level("ERROR")

argument_list = self._get_arguments(args)

manifest = dfetch.manifest.manifest.get_manifest()
topdir = Path(manifest.path).parent

resolved_args = self._resolve_args(argument_list, topdir)

with in_directory(topdir):
abs_project_paths = {
Path(project.destination).resolve() for project in manifest.projects
}

if args.dfetched and not args.not_dfetched:
block_type = FilterType.BLOCK_IF_OUTSIDE
elif args.not_dfetched:
block_type = FilterType.BLOCK_IF_INSIDE
else:
block_type = FilterType.BLOCK_ONLY_PATH_TRAVERSAL

filtered_args = self._filter_args(
topdir, resolved_args, abs_project_paths, block_type
)

if args.cmd:
run_on_cmdline_uncaptured(logger, [args.cmd] + filtered_args)
else:
print(os.linesep.join(filtered_args))

def _filter_args(
self,
topdir: Path,
resolved_args: dict[str, Optional[Path]],
abs_project_paths: set[Path],
block: FilterType,
) -> list[str]:
blocklist = self._filter_files(
topdir,
abs_project_paths,
{path for path in resolved_args.values() if path},
block,
)

filtered_args = [
arg for arg in resolved_args.keys() if resolved_args[arg] not in blocklist
]

return filtered_args

def _resolve_args(
self, argument_list: list[str], topdir: Path
) -> dict[str, Optional[Path]]:
resolved_args: dict[str, Optional[Path]] = {}
if argument_list:
for argument in argument_list:
path_obj = Path(argument.strip())
resolved_args[argument] = (
path_obj.resolve() if path_obj.exists() else None
)
else:
if not argument_list:
resolved_args = {
str(file): file.resolve()
for file in topdir.rglob("*")
if ".git" not in file.parts
}

return resolved_args

def _get_arguments(self, args: argparse.Namespace) -> list[str]:
argument_list: list[str] = list(str(arg) for arg in args.args)
if not sys.stdin.isatty():
argument_list.extend(
non_empty_line for line in sys.stdin if (non_empty_line := line.strip())
)

return argument_list

def _filter_files(
self,
topdir: Path,
paths: set[Path],
input_paths: set[Path],
block: FilterType = FilterType.BLOCK_IF_OUTSIDE,
) -> list[Path]:
"""Filter files in input_set in files in one of the paths or not."""
blocklist: list[Path] = []

for abs_path in input_paths:
try:
abs_path.relative_to(topdir)
except ValueError:
logger.print_info_line(str(abs_path), "outside project")
blocklist.append(abs_path)
continue

if block == FilterType.BLOCK_ONLY_PATH_TRAVERSAL:
continue

containing_dir = self._is_file_contained_in_any_path(abs_path, paths)

if containing_dir:
logger.print_info_line(
str(abs_path), f"inside project ({containing_dir})"
)
if block == FilterType.BLOCK_IF_INSIDE:
blocklist.append(abs_path)
else:
logger.print_info_line(str(abs_path), "not inside any project")
if block == FilterType.BLOCK_IF_OUTSIDE:
blocklist.append(abs_path)

return blocklist

def _is_file_contained_in_any_path(
self, file: Path, paths: set[Path]
) -> Optional[Path]:
"""Check if a specific file is somewhere in one of the paths."""
for path in paths:
try:
file.relative_to(path)
return path
except ValueError:
continue
return None
5 changes: 5 additions & 0 deletions dfetch/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ def increase_verbosity() -> None:
coloredlogs.increase_verbosity()


def set_level(level: str) -> None:
"""Set the level of the logger."""
coloredlogs.set_level(level)


def get_logger(name: str) -> DLogger:
"""Get logger for a module."""
logging.setLoggerClass(DLogger)
Expand Down
28 changes: 28 additions & 0 deletions dfetch/util/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,34 @@ def run_on_cmdline(
return proc


def run_on_cmdline_uncaptured(
logger: logging.Logger, cmd: Union[str, list[str]]
) -> "subprocess.CompletedProcess[Any]":
"""Run a command and log the output, and raise if something goes wrong."""
logger.debug(f"Running {cmd}")

if not isinstance(cmd, list):
cmd = cmd.split(" ")
Comment on lines +78 to +79
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naive string splitting for command parsing category Performance

Tell me more
What is the issue?

String splitting on single space fails for commands with multiple consecutive spaces or complex arguments.

Why this matters

This naive splitting approach will create empty strings in the command list when there are multiple spaces, potentially causing subprocess execution failures or incorrect argument parsing.

Suggested change ∙ Feature Preview

Use shlex.split() instead of str.split(" ") to properly handle shell-like command parsing with quoted arguments and multiple spaces:

import shlex

if not isinstance(cmd, list):
    cmd = shlex.split(cmd)
Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.


try:
proc = subprocess.run(cmd, capture_output=False, check=True) # nosec
except subprocess.CalledProcessError as exc:
raise SubprocessCommandError(
exc.cmd,
"",
"",
exc.returncode,
) from exc
except FileNotFoundError as exc:
cmd = cmd[0]
raise RuntimeError(f"{cmd} not available on system, please install") from exc

if proc.returncode:
raise SubprocessCommandError(cmd, "", "", proc.returncode)

return proc


def _log_output(proc: subprocess.CompletedProcess, logger: logging.Logger) -> None: # type: ignore
logger.debug(f"Return code: {proc.returncode}")

Expand Down
4 changes: 2 additions & 2 deletions dfetch/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,14 @@ def safe_rmtree(path: str) -> None:


@contextmanager
def in_directory(path: str) -> Generator[str, None, None]:
def in_directory(path: Union[str, Path]) -> Generator[str, None, None]:
"""Work temporarily in a given directory."""
pwd = os.getcwd()
if not os.path.isdir(path):
path = os.path.dirname(path)
os.chdir(path)
try:
yield path
yield str(path)
finally:
os.chdir(pwd)

Expand Down
Loading
Loading