Skip to content

Commit

Permalink
optimize buildkite python packages change detection (#22141)
Browse files Browse the repository at this point in the history
## Summary & Motivation

Determining buildkite pipeline steps locally is very slow (22s). I believe this is most likely due to __pycache__ directories or other similar files that are probably not an issue in CI. However, when iterating locally, it helps for this step to be as fast as possible, and these changes reduce local pipeline output from 22s to 2s. 

## How I Tested These Changes

- visually inspected that the same packages are found
- modified a python file in a package and ensured that it was still picked up as a changed package
  • Loading branch information
neilfulwiler authored Jun 6, 2024
1 parent c96791d commit bdada50
Showing 1 changed file with 17 additions and 17 deletions.
34 changes: 17 additions & 17 deletions .buildkite/dagster-buildkite/dagster_buildkite/python_packages.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
# pyright: reportUnnecessaryTypeIgnoreComment=false

import logging
import os
import subprocess
from distutils import core as distutils_core
from importlib import reload
from pathlib import Path
from typing import Dict, Optional, Set

import pathspec
from pkg_resources import Requirement, parse_requirements

from dagster_buildkite.git import ChangedFiles, GitInfo

changed_filetypes = [".py", ".cfg", ".toml", ".yaml", ".ipynb", ".yml", ".ini", ".jinja"]


def _path_is_relative_to(p: Path, u: Path) -> bool:
# see https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to
return u == p or u in p.parents


class PythonPackage:
def __init__(self, setup_py_path: Path):
self.directory = setup_py_path.parent
Expand Down Expand Up @@ -114,22 +120,16 @@ def load_from_git(cls, git_info: GitInfo) -> None:

logging.info("Finding Python packages:")

git_ignore = git_info.directory / ".gitignore"

if git_ignore.exists():
ignored = git_ignore.read_text().splitlines()
git_ignore_spec = pathspec.PathSpec.from_lines("gitwildmatch", ignored)
else:
git_ignore_spec = pathspec.PathSpec([])

# Consider any setup.py file to be a package
packages = set(
[
PythonPackage(Path(setup))
for setup in git_info.directory.rglob("setup.py")
if not git_ignore_spec.match_file(str(setup))
]
)
output = subprocess.check_output(
["git", "ls-files", "."],
cwd=str(git_info.directory),
).decode("utf-8")
packages = [
PythonPackage(git_info.directory / Path(file))
for file in output.split("\n")
if os.path.basename(file) == "setup.py"
]

for package in sorted(packages):
logging.info(" - " + package.name)
Expand All @@ -142,7 +142,7 @@ def load_from_git(cls, git_info: GitInfo) -> None:
for change in ChangedFiles.all:
if (
# Our change is in this package's directory
(change in package.directory.rglob("*"))
_path_is_relative_to(change, package.directory)
# The file can alter behavior - exclude things like README changes
and (change.suffix in changed_filetypes)
# The file is not part of a test suite. We treat this differently
Expand Down

0 comments on commit bdada50

Please sign in to comment.