Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up builds with large number of git ignored files #499

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 51 additions & 15 deletions src/poetry/core/masonry/builders/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Container


if TYPE_CHECKING:
Expand Down Expand Up @@ -41,7 +42,7 @@ def __init__(
self._poetry = poetry
self._package = poetry.package
self._path: Path = poetry.file.parent
self._excluded_files: set[str] | None = None
self._excluded_files: _IgnoredFilesChecker | None = None
self._executable = Path(executable or sys.executable)

packages = []
Expand Down Expand Up @@ -100,7 +101,7 @@ def default_target_dir(self) -> Path:
def build(self, target_dir: Path | None) -> Path:
raise NotImplementedError()

def find_excluded_files(self, fmt: str | None = None) -> set[str]:
def find_excluded_files(self, fmt: str | None = None) -> Container[str]:
if self._excluded_files is None:
from poetry.core.vcs import get_vcs

Expand Down Expand Up @@ -129,27 +130,25 @@ def find_excluded_files(self, fmt: str | None = None) -> set[str]:
Path(included).relative_to(self._path).as_posix()
)

# Note: `vcs_ignored_files` might return whole directories to
# ignore, so removing the list of `explicitly_included` isn't enough
# by itself.
ignored = (vcs_ignored_files | explicitly_excluded) - explicitly_included
for ignored_file in ignored:
logger.debug(f"Ignoring: {ignored_file}")

self._excluded_files = ignored
for included_file in explicitly_included:
logger.debug(f"Including: {included_file}")

self._excluded_files = _IgnoredFilesChecker(
included_files=explicitly_included,
excluded_files=ignored,
)

return self._excluded_files

def is_excluded(self, filepath: str | Path) -> bool:
exclude_path = Path(filepath)

while True:
if exclude_path.as_posix() in self.find_excluded_files(fmt=self.format):
return True

if len(exclude_path.parts) > 1:
exclude_path = exclude_path.parent
else:
break

return False
return filepath in self.find_excluded_files(fmt=self.format)

def find_files_to_add(self, exclude_build: bool = True) -> set[BuildIncludeFile]:
"""
Expand Down Expand Up @@ -396,3 +395,40 @@ def relative_to_source_root(self) -> Path:
return self.path.relative_to(self.source_root)

return self.path


class _IgnoredFilesChecker:
"""A container of files that should be ignored.

Args:
included_files: A set of files or directories that should not be
ignored, even if they're also in the set of excluded files.
excluded_files: A set of files or directories that should be ignored.
"""

def __init__(self, included_files: set[str], excluded_files: set[str]) -> None:
self._excluded_files = excluded_files
self._included_files = included_files

def __contains__(self, filepath: object) -> bool:
if not isinstance(filepath, (str, Path)):
raise TypeError("value must be a string or path-like object")

# We handle the fact that the included and excluded files may be
# directories by first checking the actual file and then recursively
# checking each parent directory.
exclude_path = Path(filepath)

while True:
if exclude_path.as_posix() in self._included_files:
return False

if exclude_path.as_posix() in self._excluded_files:
return True

if len(exclude_path.parts) > 1:
exclude_path = exclude_path.parent
else:
break

return False
4 changes: 3 additions & 1 deletion src/poetry/core/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,8 @@ def get_current_branch(self, folder: Path | None = None) -> str:
return output.strip()

def get_ignored_files(self, folder: Path | None = None) -> list[str]:
"""Returns the list of files and directories that are ignored by Git."""

args = []
if folder is None and self._work_dir:
folder = self._work_dir
Expand All @@ -336,7 +338,7 @@ def get_ignored_files(self, folder: Path | None = None) -> list[str]:
folder.as_posix(),
]

args += ["ls-files", "--others", "-i", "--exclude-standard"]
args += ["ls-files", "--others", "-i", "--exclude-standard", "--directory"]
output = self.run(*args)

return output.strip().split("\n")
Expand Down
34 changes: 31 additions & 3 deletions tests/masonry/builders/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@
from pytest_mock import MockerFixture


def get_excluded_files(builder: Builder) -> set[str]:
"""Get all excluded files in the project directory"""

relative_paths = [
path.relative_to(builder._path).as_posix()
for path in builder._path.rglob("*")
if path.is_file()
]

return {path for path in relative_paths if path in builder.find_excluded_files()}


def test_builder_find_excluded_files(mocker: MockerFixture) -> None:
p = mocker.patch("poetry.core.vcs.git.Git.get_ignored_files")
p.return_value = []
Expand All @@ -24,7 +36,23 @@ def test_builder_find_excluded_files(mocker: MockerFixture) -> None:
Factory().create_poetry(Path(__file__).parent / "fixtures" / "complete")
)

assert builder.find_excluded_files() == {"my_package/sub_pkg1/extra_file.xml"}
assert get_excluded_files(builder) == {"my_package/sub_pkg1/extra_file.xml"}


def test_builder_find_excluded_files_whole_directory(mocker: MockerFixture) -> None:
p = mocker.patch("poetry.core.vcs.git.Git.get_ignored_files")
p.return_value = [
"my_package/sub_pkg1",
]

builder = Builder(
Factory().create_poetry(Path(__file__).parent / "fixtures" / "complete")
)

assert get_excluded_files(builder) == {
"my_package/sub_pkg1/extra_file.xml",
"my_package/sub_pkg1/__init__.py",
}


@pytest.mark.xfail(
Expand All @@ -41,7 +69,7 @@ def test_builder_find_case_sensitive_excluded_files(mocker: MockerFixture) -> No
)
)

assert builder.find_excluded_files() == {
assert get_excluded_files(builder) == {
"my_package/FooBar/Bar.py",
"my_package/FooBar/lowercasebar.py",
"my_package/Foo/SecondBar.py",
Expand All @@ -68,7 +96,7 @@ def test_builder_find_invalid_case_sensitive_excluded_files(
)
)

assert {"my_package/Bar/foo/bar/Foo.py"} == builder.find_excluded_files()
assert {"my_package/Bar/foo/bar/Foo.py"} == get_excluded_files(builder)


def test_get_metadata_content() -> None:
Expand Down