Skip to content

Commit

Permalink
Add support for Jupyter Notebooks (#11)
Browse files Browse the repository at this point in the history
* added custom parser for extracting imports from notebooks
* added ignore-notebooks option to CLI
* updated documentation
* Added various unit tests
  • Loading branch information
Florian Maas authored Sep 3, 2022
1 parent cce90d4 commit 572f670
Show file tree
Hide file tree
Showing 22 changed files with 426 additions and 390 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

---

__deptry__ is a command line tool to check for unused dependencies in a poetry managed python project. It does so by scanning the imported modules within all `.py` files in
__deptry__ is a command line tool to check for unused dependencies in a poetry managed Python project. It does so by scanning the imported modules within all Python files in
a directory and it's subdirectories, and comparing those to the dependencies listed in `pyproject.toml`.

---
Expand Down
3 changes: 3 additions & 0 deletions deptry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import logging

logging.getLogger("nbconvert").setLevel(logging.WARNING)
15 changes: 13 additions & 2 deletions deptry/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@ def deptry():
help="""Directories in which .py files should not be scanned for imports to determine if a dependency is used or not.
Defaults to 'venv'. Specify multiple directories by using this flag twice, e.g. `-id .venv -id other_dir`""",
)
def check(verbose, ignore_dependencies, ignore_directories):
@click.option(
"--ignore-notebooks",
"-nb",
is_flag=True,
help="Boolean flag to specify if notebooks should be ignored while scanning for imports.",
)
def check(verbose, ignore_dependencies, ignore_directories, ignore_notebooks):

log_level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(level=log_level, handlers=[logging.StreamHandler()], format="%(message)s")
Expand All @@ -42,15 +48,20 @@ def check(verbose, ignore_dependencies, ignore_directories):
cli_arguments["ignore_dependencies"] = list(ignore_dependencies)
if len(ignore_directories) > 0:
cli_arguments["ignore_directories"] = list(ignore_directories)
if ignore_notebooks:
cli_arguments["ignore_notebooks"] = True
config = Config(cli_arguments)

obsolete_dependencies = Core(
ignore_dependencies=config.config["ignore_dependencies"], ignore_directories=config.config["ignore_directories"]
ignore_dependencies=config.config["ignore_dependencies"],
ignore_directories=config.config["ignore_directories"],
ignore_notebooks=config.config["ignore_notebooks"],
).run()
if len(obsolete_dependencies):
logging.info(f"pyproject.toml contains obsolete dependencies: {obsolete_dependencies}")
sys.exit(1)
else:
logging.info("Succes! No obsolete dependencies found.")
sys.exit(0)


Expand Down
2 changes: 1 addition & 1 deletion deptry/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import toml

DEFAULTS = {"ignore_dependencies": None, "ignore_directories": [".venv"]}
DEFAULTS = {"ignore_dependencies": None, "ignore_directories": [".venv"], "ignore_notebooks": False}


class Config:
Expand Down
10 changes: 7 additions & 3 deletions deptry/core.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import List

from deptry.import_parser import ImportParser
Expand All @@ -7,13 +8,16 @@


class Core:
def __init__(self, ignore_dependencies: List[str] = None, ignore_directories: List[str] = None):
def __init__(self, ignore_dependencies: List[str], ignore_directories: List[str], ignore_notebooks: bool):
self.ignore_dependencies = ignore_dependencies
self.ignore_directories = ignore_directories
self.ignore_notebooks = ignore_notebooks

def run(self):
all_py_files = PythonFileFinder(ignore_directories=self.ignore_directories).get_list_of_python_files()
imported_modules = ImportParser().get_imported_modules_for_list_of_files(all_py_files)
all_python_files = PythonFileFinder(
ignore_directories=self.ignore_directories, ignore_notebooks=self.ignore_notebooks
).get_all_python_files_in(Path("."))
imported_modules = ImportParser().get_imported_modules_for_list_of_files(all_python_files)
imported_packages = ImportsToPackageNames().convert(imported_modules)
obsolete_dependencies = ObsoleteDependenciesFinder(
imported_packages=imported_packages, ignore_dependencies=self.ignore_dependencies
Expand Down
72 changes: 46 additions & 26 deletions deptry/import_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,60 @@
from pathlib import Path
from typing import List

from deptry.notebook_import_extractor import NotebookImportExtractor


class ImportParser:
"""
Get a list of imported modules from a python file.
Scan a Python file for import statements and return a list of imported modules.
"""

def __init__(self) -> None:
pass

def get_imported_modules_for_file(self, path_to_py_file: Path) -> List[str]:
def get_imported_modules_for_list_of_files(self, list_of_files: List[Path]) -> List[str]:
modules_per_file = [self._get_imported_modules_from_file(file) for file in list_of_files]
all_modules = self._flatten_list(modules_per_file)
unique_modules = sorted(list(set(all_modules)))
logging.debug(f"All imported modules: {unique_modules}\n")
return unique_modules

def _get_imported_modules_from_file(self, path_to_file: Path) -> List[str]:
try:
modules = []
with open(path_to_py_file) as f:
root = ast.parse(f.read(), path_to_py_file)

for node in ast.iter_child_nodes(root):
if isinstance(node, ast.Import):
modules += [x.name.split(".")[0] for x in node.names]
elif isinstance(node, ast.ImportFrom):
modules.append(node.module.split(".")[0])
logging.debug(f"Found the following imports in {str(path_to_py_file)}: {modules}")
return modules
except: # noqa
logging.warning(f"Warning: Parsing imports for file {str(path_to_py_file)} failed.")

def get_imported_modules_for_list_of_files(self, list_of_paths: List[Path]) -> List[str]:
modules_per_file = [
{"path": str(path), "modules": self.get_imported_modules_for_file(path)} for path in list_of_paths
]
if str(path_to_file).endswith(".ipynb"):
modules = self._get_imported_modules_from_ipynb(path_to_file)
else:
modules = self._get_imported_modules_from_py(path_to_file)
logging.debug(f"Found the following imports in {str(path_to_file)}: {modules}")
except Exception as e:
logging.warning(f"Warning: Parsing imports for file {str(path_to_file)} failed.")
raise (e)
return modules

def _get_imported_modules_from_py(self, path_to_py_file: Path):
with open(path_to_py_file) as f:
root = ast.parse(f.read(), path_to_py_file)
return self._get_modules_from_ast_root(root)

def _get_imported_modules_from_ipynb(self, path_to_ipynb_file: Path):
imports = NotebookImportExtractor().extract(path_to_ipynb_file)
root = ast.parse("\n".join(imports))
return self._get_modules_from_ast_root(root)

@staticmethod
def _get_modules_from_ast_root(root):
modules = []
for file in modules_per_file:
if file["modules"]:
modules += file["modules"]
for node in ast.iter_child_nodes(root):
if isinstance(node, ast.Import):
modules += [x.name.split(".")[0] for x in node.names]
elif isinstance(node, ast.ImportFrom):
modules.append(node.module.split(".")[0])
return modules

unique_modules = sorted(list(set(modules)))
logging.debug(f"All imported modules: {unique_modules}\n")
return unique_modules
@staticmethod
def _flatten_list(modules_per_file):
all_modules = []
for modules in modules_per_file:
if modules:
all_modules += modules
return all_modules
2 changes: 1 addition & 1 deletion deptry/imports_to_package_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ImportsToPackageNames:
For a list of imported modules, find for each module (e.g. python_dateutil) the corresponding package
name used to install it (e.g. python-dateutil) and return those names as a list.
There are two reasons that can cause the corresponding package name not to be found:
There are three reasons that can cause the corresponding package name not to be found:
- The package is in the Python standard library. In this case, nothing is added to the output list.
- The package lacks metadata. In this case, a warning is raised.
- The package is not installed.
Expand Down
47 changes: 47 additions & 0 deletions deptry/notebook_import_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import json
import re
from pathlib import Path
from typing import List


class NotebookImportExtractor:
"""
Class to extract import statements from a Jupyter notebook.
"""

def __init__(self) -> None:
pass

def extract(self, path_to_ipynb: Path) -> List[str]:
"""
Extract import statements from a Jupyter notebook and return them as a list of strings, where
each element in the list is one of the import statements.
Args:
path_to_ipynb: Path to the .ipynb file to extract inputs from
"""
notebook = self._read_ipynb_file(path_to_ipynb)
cells = self._keep_code_cells(notebook)
import_statements = [self._extract_import_statements_from_cell(cell) for cell in cells]
return self._flatten(import_statements)

@staticmethod
def _read_ipynb_file(path_to_ipynb: Path) -> dict:
with open(path_to_ipynb, "r") as f:
notebook = json.load(f)
return notebook

@staticmethod
def _keep_code_cells(notebook: dict) -> List[dict]:
return [cell for cell in notebook["cells"] if cell["cell_type"] == "code"]

@staticmethod
def _contains_import_statements(line: str) -> bool:
return re.search(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?", line) is not None

def _extract_import_statements_from_cell(self, cell: dict) -> str:
return [line for line in cell["source"] if self._contains_import_statements(line)]

@staticmethod
def _flatten(list_of_lists: List[List]) -> List:
return [item for sublist in list_of_lists for item in sublist]
29 changes: 15 additions & 14 deletions deptry/python_file_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,29 @@
class PythonFileFinder:
"""
Get a list of all .py and .ipynb files recursively within a directory.
If ignore_notebooks is set to True, .ipynb files are ignored and only .py files are returned.
"""

def __init__(self, ignore_directories: List[str] = [".venv"], include_ipynb: bool = False) -> None:
def __init__(self, ignore_directories: List[str] = [".venv"], ignore_notebooks: bool = False) -> None:
self.ignore_directories = ignore_directories
self.include_ipynb = include_ipynb
self.ignore_notebooks = ignore_notebooks

def get_list_of_python_files(self):
all_py_files = self._get_all_python_files()
if self.include_ipynb:
all_py_files = self._add_ipynb_files(all_py_files)
all_py_files = self._remove_ignore_directories(all_py_files)
def get_all_python_files_in(self, directory: Path):
all_python_files = self._get_all_py_files_in(directory)
if not self.ignore_notebooks:
all_python_files += self._get_all_ipynb_files_in(directory)
all_python_files = self._remove_directories_to_ignore(all_python_files)
nl = "\n"
logging.debug(f"Python files to scan for imports:\n{nl.join([str(x) for x in all_py_files])}\n")
return all_py_files
logging.debug(f"Python files to scan for imports:\n{nl.join([str(x) for x in all_python_files])}\n")
return all_python_files

def _get_all_python_files(self) -> List[Path]:
return [path for path in Path(".").rglob("*.py")]
def _get_all_py_files_in(self, directory: Path) -> List[Path]:
return [path for path in directory.rglob("*.py")]

def _add_ipynb_files(self, all_py_files: List[Path]) -> List[Path]:
return all_py_files + [path for path in Path(".").rglob("*.ipynb")]
def _get_all_ipynb_files_in(self, directory: Path) -> List[Path]:
return [path for path in directory.rglob("*.ipynb")]

def _remove_ignore_directories(self, all_py_files: List[Path]) -> List[Path]:
def _remove_directories_to_ignore(self, all_py_files: List[Path]) -> List[Path]:
return [
path
for path in all_py_files
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

---

_deptry_ is a command line tool to check for unused dependencies in a poetry managed python project. It does so by scanning the imported modules within all `.py` files in
_deptry_ is a command line tool to check for unused dependencies in a poetry managed Python project. It does so by scanning the imported modules within all Python files in
a directory and it's subdirectories, and comparing those to the dependencies listed in `pyproject.toml`.

## Installation and usage
Expand Down
11 changes: 10 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,22 @@ deptry check -i pandas -i numpy

## Ignore directories

_deptry_ scans the working directory and it's subdirectories recursively for `.py` files to scan for import statements. By default,
_deptry_ scans the working directory and it's subdirectories recursively for `.py` and `.ipynb` files to scan for import statements. By default,
the `.venv` directory is ignored. To ignore other directories, use the `-id` flag. Note that this overwrites the default, so to ignore
both the `.venv` directory and another directory, use the flag twice:

```sh
deptry check -id .venv -id other_directory
```

## Ignore notebooks

By default, _deptry_ scans the working directory for `.py` and `.ipynb` files to check for import statements. To ignore `.ipynb` files, use the `--ignore-notebooks` flag:

```sh
deptry check --ignore-notebooks
```

## pyproject.toml

_deptry_ can also be configured through `pyproject.toml`. An example `pyproject.toml` entry for `deptry` looks as follows:
Expand All @@ -64,6 +72,7 @@ ignore_directories = [
'.venv',
'other_directory'
]
ignore_notebooks = false
```

## Lookup hierarchy
Expand Down
Loading

0 comments on commit 572f670

Please sign in to comment.