From 7100b864d2acd29035ababbf303929743d004ba8 Mon Sep 17 00:00:00 2001 From: Nok Lam Chan Date: Mon, 3 Feb 2025 23:33:55 +0000 Subject: [PATCH] add notes --- README.md | 14 +++---- src/ruff_in_python/cache.py | 0 src/ruff_in_python/check.py | 76 +++++++++++++++++++++++++++++++++++ src/ruff_in_python/lib.py | 0 src/ruff_in_python/linter.py | 36 +++++++++++++++++ src/ruff_in_python/message.py | 13 +++++- src/ruff_in_python/parser.py | 3 +- tests/bar.py | 3 +- 8 files changed, 133 insertions(+), 12 deletions(-) delete mode 100644 src/ruff_in_python/cache.py create mode 100644 src/ruff_in_python/check.py delete mode 100644 src/ruff_in_python/lib.py diff --git a/README.md b/README.md index c9b6bbb..cede5a9 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,7 @@ # learn-rust-by-building-ruff -## Rebuild with Python -Components: -- [ ] Tokenizer (Lexer) -- [ ] Parser +## The First version of Rust +These are the main files included in the first version of Rust - that power only two specific rules. While it cannot be used in a meaningful way, it provides an easy way to understand how `ruff` (or generally other linter) works without getting into all the details that are not so important for education. main.rs cache.rs @@ -27,16 +25,16 @@ anyhow: Flexible concrete Error type built on std::error::Error ## Libraries I used for the Python verrsion logging: std logging library -?: parallel computation typer: cli argument parsing -?: serialising ast: Python native ast library rich: color terminal os/pathlib: walking directory -?: error pytest: write simple tests -## Rebuild with Rust +## Next Step +- Implement Cache +- Implement parallel processing +- Reimplement in Rust Reference: Blog: https://compileralchemy.substack.com/p/ruff-internals-of-a-rust-backed-python diff --git a/src/ruff_in_python/cache.py b/src/ruff_in_python/cache.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/ruff_in_python/check.py b/src/ruff_in_python/check.py new file mode 100644 index 0000000..648ee9d --- /dev/null +++ b/src/ruff_in_python/check.py @@ -0,0 +1,76 @@ +from pathlib import Path +from .message import IfTuple, ImportStarUsage, Location, Message +import ast + +def check_statement(path: Path, stmt: ast.stmt) -> list[Message]: + """recursive function to parse statement""" + messages = [] + + match stmt: + case ast.FunctionDef() | ast.AsyncFunctionDef() | ast.ClassDef(): + for body_stmt in stmt.body: + messages.extend(check_statement(path, body_stmt)) + case ast.For() | ast.AsyncFor() | ast.While(): + for body_stmt in stmt.body: + messages.extend(check_statement(path, body_stmt)) + for orelse_stmt in stmt.orelse: + messages.extend(check_statement(path, orelse_stmt)) + case ( + ast.Return() + | ast.Delete() + | ast.Assign() + | ast.AugAssign() + | ast.AnnAssign() + | ast.Raise() + | ast.Assert() + | ast.Import() + | ast.Global() + | ast.Nonlocal() + | ast.Expr() + | ast.Pass() + | ast.Break() + | ast.Continue() + ): + pass # Do nothing for these statement types + case ast.If(): + # Check if the test is a tuple + if isinstance(stmt.test, ast.Tuple): # Assuming node is a tuple type + messages.append( + IfTuple( + filename=path, + location=Location(row=stmt.lineno, column=stmt.col_offset), + ) + ) + for body_stmt in stmt.body: + messages.extend(check_statement(path, body_stmt)) + for orelse_stmt in stmt.orelse: + messages.extend(check_statement(path, orelse_stmt)) + + case ast.With() | ast.AsyncWith(): + for body_stmt in stmt.body: + messages.extend(check_statement(path, body_stmt)) + case ast.ImportFrom(): + print(123) + for alias in stmt.names: + if alias.name == "*": + messages.append( + ImportStarUsage( + filename=path, + location=Location(row=stmt.lineno, column=stmt.col_offset), + ) + ) + case ast.ImportFrom() | ast.Try(): + raise NotImplementedError + case _: + print("BUG!: ", type(stmt)) + # Add more elif clauses for other statement types... + + return messages + + +def check_ast(path: Path, python_ast: list[ast.stmt]) -> list[Message]: + messages = [] + for stmt in python_ast: + messages.extend(check_statement(path, stmt)) + print("All Messages:", messages) + return messages diff --git a/src/ruff_in_python/lib.py b/src/ruff_in_python/lib.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/ruff_in_python/linter.py b/src/ruff_in_python/linter.py index e69de29..84b334d 100644 --- a/src/ruff_in_python/linter.py +++ b/src/ruff_in_python/linter.py @@ -0,0 +1,36 @@ +from dataclasses import dataclass +from pathlib import Path +from ruff_in_python.message import Message +from ruff_in_python.parser import parse_file +from ruff_in_python.check import check_ast + + +@dataclass +class CacheMetadata: + size: int + mtime: int + + +@dataclass +class CheckResult: + metadata: CacheMetadata + messages: list[Message] + + def check_path(self, path: Path) -> list[Message]: + # TODO: skip cache + + # Run the linter + python_ast = parse_file(path) + messages = check_ast(path, python_ast) + + # TODO: set cache + + return messages + + +if __name__ == "__main__": + result = CheckResult("dummy", []) + result.check_path("tests/foo.py") + + result = CheckResult("dummy", []) + result.check_path("tests/bar.py") diff --git a/src/ruff_in_python/message.py b/src/ruff_in_python/message.py index 8572239..e49cc07 100644 --- a/src/ruff_in_python/message.py +++ b/src/ruff_in_python/message.py @@ -1,3 +1,5 @@ +""" "Messages are the warnings that you expected to see from linter.""" + from dataclasses import dataclass from pathlib import Path from typing import Union @@ -5,11 +7,13 @@ from rich.text import Text + @dataclass class Location: row: int column: int + @dataclass class Message(ABC): filename: Path @@ -35,6 +39,8 @@ def richify(self) -> Text: text.append(f"\t{self.code}", style="bold red") text.append(f"\t{self.body}") return text + + @dataclass class ImportStarUsage(Message): @property @@ -45,6 +51,7 @@ def code(self) -> str: def body(self) -> str: return "Unable to detect undefined names" + @dataclass class IfTuple(Message): @property @@ -55,9 +62,11 @@ def code(self) -> str: def body(self) -> str: return "If test is a tuple, which is always `True`" + MessageType = Union[ImportStarUsage, IfTuple] if __name__ == "__main__": from rich import print - m1 = IfTuple(Path("some_path"),Location(1,2)) - print(m1.richify()) \ No newline at end of file + + m1 = IfTuple(Path("some_path"), Location(1, 2)) + print(m1.richify()) diff --git a/src/ruff_in_python/parser.py b/src/ruff_in_python/parser.py index 3dac793..4d2622d 100644 --- a/src/ruff_in_python/parser.py +++ b/src/ruff_in_python/parser.py @@ -12,4 +12,5 @@ def pretty_print_ast(self): def parse_file(path: Path): with open(path) as f: - return ast.parse(f.read()) + res = ast.parse(f.read()) + return res.body diff --git a/tests/bar.py b/tests/bar.py index d0c5c49..ac66900 100644 --- a/tests/bar.py +++ b/tests/bar.py @@ -1,5 +1,6 @@ if (1, 2): - pass + if (3, 4): + pass for _ in range(5): if True: