Skip to content

Commit

Permalink
Begin katalin
Browse files Browse the repository at this point in the history
  • Loading branch information
kislyuk committed Feb 12, 2024
1 parent ce2cc43 commit d5d5fe2
Show file tree
Hide file tree
Showing 6 changed files with 331 additions and 24 deletions.
4 changes: 0 additions & 4 deletions .github/scripts/build-ghautodoc.py

This file was deleted.

34 changes: 14 additions & 20 deletions .github/workflows/ghautodoc.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
name: ghautodoc
on:
workflow_dispatch:
pull_request:
defaults:
run: { shell: bash }

jobs:
on: [pull_request]

permissions:
contents: read
pull-requests: write

build:
None
strategy:
fail-fast: false
matrix: { os: [ubuntu] }
runs-on: ${{ matrix.os }}-latest
jobs:
ci:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions/setup-python@v2
with: {python-version: '3.8'}
- name: Run script
env:
CONTEXT_GITHUB: ${{ toJson(github) }}
run: |
pip install -Uq ghapi
python .github/scripts/build-ghautodoc.py
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- uses: kislyuk/katalin@v1
with:
openai-api-key: ${{secrets.OPENAI_API_KEY}}
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# katalin GitHub action

This is a GitHub action that generates LLM-assisted suggestions for improving Python code in PRs, and posts them as PR
comments.

## Inputs

### `openai-api-token`

**Required** The OpenAI API token to use.

### `enabled-advisors`

A newline-separated list of advisor modules to enable. Available advisors are:

* `docstrings`: Provides doscstring suggestions for undocumented Python modules, functions, classes, and methods.
* `security`: Provides comments regarding potential security concerns.
* `logic-check`: Identifies possible logic errors.

<!--
### `custom-prompts`
A newline-separated list of colon-separated `node:prompt` pairs. TODO
-->

## Outputs

None

## Example usage

```yaml
uses: kislyuk/katalin@v1
with:
openai-api-token: ${{secrets.GITHUB_TOKEN}}
enabled-advisors: |-
docstrings
security
logic-check
```
35 changes: 35 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Python Code Advisor
description: Comments on PRs with suggested documentation, security, and logic improvements
inputs:
openai-api-key:
description: The OpenAI API key used to create an authenticated OpenAI API client
required: true
openai-model-name:
description: The OpenAI model to use to generate suggestions
default: gpt-3.5-turbo-0125
enabled-advisors:
description: List of advisor modules to enable
default: |-
docstrings
security
logic-check
required: true

defaults:
run:
shell: bash

runs:
using: "composite"
steps:
- run: |
pip install -r "${{github.action_path}}/requirements.txt"
python "${{github.action_path}}/main.py"
shell: bash
env:
GITHUB_TOKEN: ${{ github.token }}
GITHUB_EVENT: ${{ toJson(github.event) }}
OPENAI_API_KEY: ${{ inputs.openai-api-key }}
OPENAI_MODEL_NAME: ${{ inputs.openai-model-name }}
ENABLED_ADVISORS: ${{ inputs.enabled-advisors }}
if: ${{ ! contains(github.event.pull_request.labels.*.name, 'skip-llm-advisors') }}
239 changes: 239 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
"""
python-code-advisor: a GitHub Action to generate LLM-assisted pull request suggestions.
See README.md for more information.
"""
import ast
import json
import logging
import os
import textwrap
from collections import defaultdict

import requests
from openai import OpenAI
from unidiff import PatchSet

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

prompt = """
Given the following Python file:
```
{content}
```
please provide a concise Python docstring for the {documentable_name} {documentable_type}, with a human readable description of the purpose of the {documentable_type}, and a Sphinx annotation of its input parameters and output value. Provide the text of the docstring directly, without any quotation marks or method signature.
"""

SUGGESTION_TEMPLATE = """
#### _Suggested documentation improvement_
It looks like this class, method, or function has no docstring.
```suggestion
{original_line}{body}
```
_You can edit or replace the proposed docstring before committing it by clicking the "..." menu._
"""

openai_client = OpenAI()
openai_model_name = os.environ.get("OPENAI_MODEL_NAME", "gpt-3.5-turbo-0125")
pr_url = ""
pr_head_sha = ""
github_headers = {}


def get_diff(pr_url: str, headers: dict) -> str:
res = requests.get(
pr_url,
headers=dict(headers, Accept="application/vnd.github.v3.diff"),
timeout=30,
)
res.raise_for_status()
return res.text


def get_files(pr_url: str, headers: dict) -> list:
res = requests.get(f"{pr_url}/files", headers=headers, timeout=30)
res.raise_for_status()
return res.json()


def add_comment(
pr_url: str,
headers: dict,
body: str,
commit_id: str,
path: str,
line: int,
side: str = "RIGHT",
):
res = requests.post(
f"{pr_url}/comments",
headers=headers,
json=dict(
body=body,
commit_id=commit_id,
path=path,
line=line,
side=side,
),
timeout=30,
)
logger.info(res.text)
res.raise_for_status()
return res.json()


def get_suggested_docstring(prompt, **format_args):
chat_completion = openai_client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt.format(**format_args),
}
],
model=openai_model_name,
)
docstring = chat_completion.choices[0].message.content
docstring = docstring.replace('"""', "")
docstring = textwrap.fill(docstring, replace_whitespace=False, width=116)
docstring = textwrap.indent(docstring, " " * 4)
docstring = f' """\n{docstring}\n """'
return docstring


def suggest_docstring(filename, line, documentable, source):
logger.info("Processing: %s", line)
suggested_docstring = get_suggested_docstring(
prompt,
content=source,
documentable_name=documentable["name"],
documentable_type=documentable["type"],
)
logger.info("Will add comment at %s:%s", filename, line.target_line_no)
add_comment(
pr_url=pr_url,
headers=github_headers,
body=SUGGESTION_TEMPLATE.format(
original_line=line.value, body=suggested_docstring
),
commit_id=pr_head_sha,
path=filename,
line=line.target_line_no,
)


def has_docstring(node):
if isinstance(node.body[0], ast.Expr):
if isinstance(node.body[0].value, ast.Constant):
if isinstance(node.body[0].value.value, str):
return True
return False


def get_node_annotation(node, node_type):
return {
"type": node_type,
"name": node.name,
"has_docstring": has_docstring(node),
"first_body_lineno": node.body[0].lineno,
}


def get_documentables(module_node):
documentables = defaultdict(dict)
for node in module_node.body:
if isinstance(node, ast.FunctionDef) and not node.name.startswith("_"):
documentables[node.lineno] = get_node_annotation(node, "function")
elif isinstance(node, ast.ClassDef) and not node.name.startswith("_"):
documentables[node.lineno] = get_node_annotation(node, "class")
for subnode in node.body:
if isinstance(subnode, ast.FunctionDef):
if subnode.name.startswith("_"):
continue
documentables[subnode.lineno] = get_node_annotation(
subnode, "method"
)
return documentables


def get_docstring_lineno(documentable, all_lines):
first_body_lineno = documentable["first_body_lineno"] - 1
while True:
if first_body_lineno not in all_lines:
break
if all_lines[first_body_lineno].value.strip().startswith("#"):
first_body_lineno -= 1
else:
return first_body_lineno


def scan_diff(pr_url, headers):
for patch in PatchSet(get_diff(pr_url, headers)):
logger.info("Processing patch %s", patch.__dict__)
if not patch.target_file.endswith(".py"):
continue
if any(
f"/{x}/" in patch.target_file for x in ["tests", "migrations", "backfills"]
):
continue
with open(patch.target_file[2:], "r") as f:
source = f.read()
if "```" in source:
continue
try:
module = ast.parse(source)
except Exception as e:
logger.info("Error parsing %s: %s", patch.target_file, e)
continue
documentables = get_documentables(module)

all_lines = {}
for hunk in patch:
for line in hunk:
if line.line_type == "+":
all_lines[line.target_line_no] = line

for hunk in patch:
logger.info("Processing hunk %s", hunk.__dict__)
for line in hunk:
if line.line_type != "+":
continue
if not (
line.value.startswith("def ") or line.value.startswith("class ")
):
continue
if line.target_line_no not in documentables:
continue
documentable = documentables[line.target_line_no]
docstring_lineno = get_docstring_lineno(documentable, all_lines)
if not documentable["has_docstring"]:
suggest_docstring(
patch.target_file[2:],
all_lines[docstring_lineno],
documentable,
source,
)


if __name__ == "__main__":
logger.info("python-code-advisor starting")
github_headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}",
}

github_event = json.loads(os.environ["GITHUB_EVENT"])
enabled_advisors = os.environ["ENABLED_ADVISORS"].splitlines(keepends=False)

pr_url = github_event["pull_request"]["url"]
pr_head_sha = github_event["pull_request"]["head"]["sha"]
pr_labels = [label["name"] for label in github_event["pull_request"]["labels"]]
for label in pr_labels:
for advisor in enabled_advisors:
if label == f"skip-{advisor}":
enabled_advisors.remove(advisor)
logger.info("Enabled advisors: %s", enabled_advisors)
if "docstrings" in enabled_advisors:
scan_diff(pr_url, headers=github_headers)
else:
logger.info("Skipping docstrings advisor")
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
requests
unidiff
openai

0 comments on commit d5d5fe2

Please sign in to comment.