diff --git a/.github/scripts/build-ghautodoc.py b/.github/scripts/build-ghautodoc.py deleted file mode 100644 index d812acd..0000000 --- a/.github/scripts/build-ghautodoc.py +++ /dev/null @@ -1,4 +0,0 @@ -from fastcore.all import * -from ghapi import * - -print("ghautodoc") diff --git a/.github/workflows/ghautodoc.yml b/.github/workflows/ghautodoc.yml index 9803351..8d2895b 100644 --- a/.github/workflows/ghautodoc.yml +++ b/.github/workflows/ghautodoc.yml @@ -1,25 +1,19 @@ name: ghautodoc -on: - workflow_dispatch: - pull_request: -defaults: - run: { shell: bash } -jobs: +on: [pull_request] + +permissions: + contents: read + pull-requests: write - build: -None - strategy: - fail-fast: false - matrix: { os: [ubuntu] } - runs-on: ${{ matrix.os }}-latest +jobs: + ci: + runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - - uses: actions/setup-python@v2 - with: {python-version: '3.8'} - - name: Run script - env: - CONTEXT_GITHUB: ${{ toJson(github) }} - run: | - pip install -Uq ghapi - python .github/scripts/build-ghautodoc.py + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - uses: kislyuk/katalin@v1 + with: + openai-api-key: ${{secrets.OPENAI_API_KEY}} diff --git a/README.md b/README.md new file mode 100644 index 0000000..c2f762f --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# katalin GitHub action + +This is a GitHub action that generates LLM-assisted suggestions for improving Python code in PRs, and posts them as PR +comments. + +## Inputs + +### `openai-api-token` + +**Required** The OpenAI API token to use. + +### `enabled-advisors` + +A newline-separated list of advisor modules to enable. Available advisors are: + +* `docstrings`: Provides doscstring suggestions for undocumented Python modules, functions, classes, and methods. +* `security`: Provides comments regarding potential security concerns. +* `logic-check`: Identifies possible logic errors. + + + +## Outputs + +None + +## Example usage + +```yaml +uses: kislyuk/katalin@v1 +with: + openai-api-token: ${{secrets.GITHUB_TOKEN}} + enabled-advisors: |- + docstrings + security + logic-check +``` diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..9126bba --- /dev/null +++ b/action.yml @@ -0,0 +1,35 @@ +name: Python Code Advisor +description: Comments on PRs with suggested documentation, security, and logic improvements +inputs: + openai-api-key: + description: The OpenAI API key used to create an authenticated OpenAI API client + required: true + openai-model-name: + description: The OpenAI model to use to generate suggestions + default: gpt-3.5-turbo-0125 + enabled-advisors: + description: List of advisor modules to enable + default: |- + docstrings + security + logic-check + required: true + +defaults: + run: + shell: bash + +runs: + using: "composite" + steps: + - run: | + pip install -r "${{github.action_path}}/requirements.txt" + python "${{github.action_path}}/main.py" + shell: bash + env: + GITHUB_TOKEN: ${{ github.token }} + GITHUB_EVENT: ${{ toJson(github.event) }} + OPENAI_API_KEY: ${{ inputs.openai-api-key }} + OPENAI_MODEL_NAME: ${{ inputs.openai-model-name }} + ENABLED_ADVISORS: ${{ inputs.enabled-advisors }} + if: ${{ ! contains(github.event.pull_request.labels.*.name, 'skip-llm-advisors') }} diff --git a/main.py b/main.py new file mode 100644 index 0000000..3a44dc2 --- /dev/null +++ b/main.py @@ -0,0 +1,239 @@ +""" +python-code-advisor: a GitHub Action to generate LLM-assisted pull request suggestions. + +See README.md for more information. +""" +import ast +import json +import logging +import os +import textwrap +from collections import defaultdict + +import requests +from openai import OpenAI +from unidiff import PatchSet + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +prompt = """ +Given the following Python file: +``` +{content} +``` +please provide a concise Python docstring for the {documentable_name} {documentable_type}, with a human readable description of the purpose of the {documentable_type}, and a Sphinx annotation of its input parameters and output value. Provide the text of the docstring directly, without any quotation marks or method signature. +""" + +SUGGESTION_TEMPLATE = """ +#### _Suggested documentation improvement_ +It looks like this class, method, or function has no docstring. +```suggestion +{original_line}{body} +``` +_You can edit or replace the proposed docstring before committing it by clicking the "..." menu._ +""" + +openai_client = OpenAI() +openai_model_name = os.environ.get("OPENAI_MODEL_NAME", "gpt-3.5-turbo-0125") +pr_url = "" +pr_head_sha = "" +github_headers = {} + + +def get_diff(pr_url: str, headers: dict) -> str: + res = requests.get( + pr_url, + headers=dict(headers, Accept="application/vnd.github.v3.diff"), + timeout=30, + ) + res.raise_for_status() + return res.text + + +def get_files(pr_url: str, headers: dict) -> list: + res = requests.get(f"{pr_url}/files", headers=headers, timeout=30) + res.raise_for_status() + return res.json() + + +def add_comment( + pr_url: str, + headers: dict, + body: str, + commit_id: str, + path: str, + line: int, + side: str = "RIGHT", +): + res = requests.post( + f"{pr_url}/comments", + headers=headers, + json=dict( + body=body, + commit_id=commit_id, + path=path, + line=line, + side=side, + ), + timeout=30, + ) + logger.info(res.text) + res.raise_for_status() + return res.json() + + +def get_suggested_docstring(prompt, **format_args): + chat_completion = openai_client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt.format(**format_args), + } + ], + model=openai_model_name, + ) + docstring = chat_completion.choices[0].message.content + docstring = docstring.replace('"""', "") + docstring = textwrap.fill(docstring, replace_whitespace=False, width=116) + docstring = textwrap.indent(docstring, " " * 4) + docstring = f' """\n{docstring}\n """' + return docstring + + +def suggest_docstring(filename, line, documentable, source): + logger.info("Processing: %s", line) + suggested_docstring = get_suggested_docstring( + prompt, + content=source, + documentable_name=documentable["name"], + documentable_type=documentable["type"], + ) + logger.info("Will add comment at %s:%s", filename, line.target_line_no) + add_comment( + pr_url=pr_url, + headers=github_headers, + body=SUGGESTION_TEMPLATE.format( + original_line=line.value, body=suggested_docstring + ), + commit_id=pr_head_sha, + path=filename, + line=line.target_line_no, + ) + + +def has_docstring(node): + if isinstance(node.body[0], ast.Expr): + if isinstance(node.body[0].value, ast.Constant): + if isinstance(node.body[0].value.value, str): + return True + return False + + +def get_node_annotation(node, node_type): + return { + "type": node_type, + "name": node.name, + "has_docstring": has_docstring(node), + "first_body_lineno": node.body[0].lineno, + } + + +def get_documentables(module_node): + documentables = defaultdict(dict) + for node in module_node.body: + if isinstance(node, ast.FunctionDef) and not node.name.startswith("_"): + documentables[node.lineno] = get_node_annotation(node, "function") + elif isinstance(node, ast.ClassDef) and not node.name.startswith("_"): + documentables[node.lineno] = get_node_annotation(node, "class") + for subnode in node.body: + if isinstance(subnode, ast.FunctionDef): + if subnode.name.startswith("_"): + continue + documentables[subnode.lineno] = get_node_annotation( + subnode, "method" + ) + return documentables + + +def get_docstring_lineno(documentable, all_lines): + first_body_lineno = documentable["first_body_lineno"] - 1 + while True: + if first_body_lineno not in all_lines: + break + if all_lines[first_body_lineno].value.strip().startswith("#"): + first_body_lineno -= 1 + else: + return first_body_lineno + + +def scan_diff(pr_url, headers): + for patch in PatchSet(get_diff(pr_url, headers)): + logger.info("Processing patch %s", patch.__dict__) + if not patch.target_file.endswith(".py"): + continue + if any( + f"/{x}/" in patch.target_file for x in ["tests", "migrations", "backfills"] + ): + continue + with open(patch.target_file[2:], "r") as f: + source = f.read() + if "```" in source: + continue + try: + module = ast.parse(source) + except Exception as e: + logger.info("Error parsing %s: %s", patch.target_file, e) + continue + documentables = get_documentables(module) + + all_lines = {} + for hunk in patch: + for line in hunk: + if line.line_type == "+": + all_lines[line.target_line_no] = line + + for hunk in patch: + logger.info("Processing hunk %s", hunk.__dict__) + for line in hunk: + if line.line_type != "+": + continue + if not ( + line.value.startswith("def ") or line.value.startswith("class ") + ): + continue + if line.target_line_no not in documentables: + continue + documentable = documentables[line.target_line_no] + docstring_lineno = get_docstring_lineno(documentable, all_lines) + if not documentable["has_docstring"]: + suggest_docstring( + patch.target_file[2:], + all_lines[docstring_lineno], + documentable, + source, + ) + + +if __name__ == "__main__": + logger.info("python-code-advisor starting") + github_headers = { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}", + } + + github_event = json.loads(os.environ["GITHUB_EVENT"]) + enabled_advisors = os.environ["ENABLED_ADVISORS"].splitlines(keepends=False) + + pr_url = github_event["pull_request"]["url"] + pr_head_sha = github_event["pull_request"]["head"]["sha"] + pr_labels = [label["name"] for label in github_event["pull_request"]["labels"]] + for label in pr_labels: + for advisor in enabled_advisors: + if label == f"skip-{advisor}": + enabled_advisors.remove(advisor) + logger.info("Enabled advisors: %s", enabled_advisors) + if "docstrings" in enabled_advisors: + scan_diff(pr_url, headers=github_headers) + else: + logger.info("Skipping docstrings advisor") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..28cfe58 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +unidiff +openai