Skip to content

Commit

Permalink
feat: add pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
leoguillaume committed Sep 17, 2024
1 parent 621c81e commit e3f4545
Show file tree
Hide file tree
Showing 32 changed files with 423 additions and 527 deletions.
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.6.5
hooks:
# Run the linter.
- id: ruff
types_or: [ python, pyi ]
args: [ --fix ]
# Run the formatter.
- id: ruff-format
types_or: [ python, pyi ]
3 changes: 2 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ feat(collections): collection name retriever
2. Dans un environnement virtuel Python, installez les packages Python présents dans le fichier *[pyproject.toml](./pyproject.toml)*

```bash
pip install ".[ui,app]"
pip install ".[ui,app,dev]"
pre-commit install
```

# Tests
Expand Down
3 changes: 2 additions & 1 deletion app/endpoints/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

router = APIRouter()


# @TODO: remove tooling from here
@router.post("/chat/completions")
async def chat_completions(request: ChatCompletionRequest, user: str = Security(check_api_key)) -> Union[ChatCompletion, ChatCompletionChunk]:
Expand Down Expand Up @@ -65,7 +66,7 @@ async def chat_completions(request: ChatCompletionRequest, user: str = Security(
return ChatCompletion(**data)

# stream case
async def forward_stream(url:str, headers:dict, request: dict):
async def forward_stream(url: str, headers: dict, request: dict):
async with httpx.AsyncClient(timeout=20) as async_client:
async with async_client.stream(method="POST", url=url, headers=headers, json=request) as response:
i = 0
Expand Down
1 change: 1 addition & 0 deletions app/endpoints/chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

router = APIRouter()


# @TODO: add pagination
@router.get("/chunks/{collection}/{chunk}")
@router.post("/chunks/{collection}")
Expand Down
1 change: 1 addition & 0 deletions app/endpoints/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

router = APIRouter()


# @TODO: remove get one collection and a /collections/search to similarity search (remove /tools)
@router.get("/collections/{collection}")
@router.get("/collections")
Expand Down
4 changes: 1 addition & 3 deletions app/endpoints/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@


@router.post("/completions")
async def completions(
request: CompletionRequest, user: str = Security(check_api_key)
) -> Completions:
async def completions(request: CompletionRequest, user: str = Security(check_api_key)) -> Completions:
"""
Completion API similar to OpenAI's API.
See https://platform.openai.com/docs/api-reference/completions/create for the API specification.
Expand Down
4 changes: 1 addition & 3 deletions app/endpoints/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@


@router.post("/embeddings")
async def embeddings(
request: EmbeddingsRequest, user: str = Security(check_api_key)
) -> Embeddings:
async def embeddings(request: EmbeddingsRequest, user: str = Security(check_api_key)) -> Embeddings:
"""
Embedding API similar to OpenAI's API.
See https://platform.openai.com/docs/api-reference/embeddings/create for the API specification.
Expand Down
5 changes: 2 additions & 3 deletions app/endpoints/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@

router = APIRouter()


@router.get("/models/{model:path}")
@router.get("/models")
async def models(
model: Optional[str] = None, user: str = Security(check_api_key)
) -> Union[Models, Model]:
async def models(model: Optional[str] = None, user: str = Security(check_api_key)) -> Union[Models, Model]:
"""
Model API similar to OpenAI's API.
See https://platform.openai.com/docs/api-reference/models/list for the API specification.
Expand Down
4 changes: 3 additions & 1 deletion app/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from ._gristkeymanager import GristKeyManager
from ._s3fileloader import S3FileLoader
from ._textcleaner import TextCleaner
from ._universalparser import UniversalParser
from ._gristkeymanager import GristKeyManager
from ._vectorstore import VectorStore

__all__ = ["S3FileLoader", "TextCleaner", "GristKeyManager", "UniversalParser", "VectorStore"]
21 changes: 5 additions & 16 deletions app/helpers/_s3fileloader.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
import os
import tempfile
from typing import TYPE_CHECKING, Any, Callable, List, Optional
import magic
from typing import Any, Callable, List, Optional

from langchain_community.document_loaders.unstructured import UnstructuredBaseLoader

from ._universalparser import UniversalParser

if TYPE_CHECKING:
import botocore


class S3FileLoader(UnstructuredBaseLoader):
"""Load from `Amazon AWS S3` files into Langchain documents."""
Expand All @@ -21,8 +17,8 @@ def __init__(
mode: str = "single",
post_processors: Optional[List[Callable]] = None,
chunk_size: Optional[int],
chunk_overlap: Optional[int] ,
chunk_min_size: Optional[int] ,
chunk_overlap: Optional[int],
chunk_min_size: Optional[int],
**unstructured_kwargs: Any,
):
"""Initialize loader.
Expand All @@ -40,11 +36,7 @@ def __init__(
self.chunk_overlap = chunk_overlap
self.chunk_min_size = chunk_min_size

def _get_elements(
self,
bucket: str,
file_id: str,
) -> List:
def _get_elements(self, bucket: str, file_id: str) -> List:
"""Get elements.
Args:
Expand All @@ -61,10 +53,7 @@ def _get_elements(

# Returns a list of Langchain documents
return self.parser.parse_and_chunk(
file_path=file_path,
chunk_size=self.chunk_size,
chunk_overlap=self.chunk_overlap,
chunk_min_size=self.chunk_min_size,
file_path=file_path, chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap, chunk_min_size=self.chunk_min_size
)

def _get_metadata(self, bucket, file_id) -> dict:
Expand Down
7 changes: 3 additions & 4 deletions app/helpers/_textcleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@


class TextCleaner:

def __init__(self):
pass

Expand All @@ -15,12 +14,12 @@ def clean_string(self, input_string):
return input_string

# Remove NUL bytes
input_string = input_string.replace('\x00', '')
input_string = input_string.replace("\x00", "")

# Remove non-printable characters
input_string = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', input_string)
input_string = re.sub(r"[\x00-\x1f\x7f-\x9f]", "", input_string)

# Normalize Unicode characters to NFC (Normalization Form C)
input_string = unicodedata.normalize('NFC', input_string)
input_string = unicodedata.normalize("NFC", input_string)

return input_string
Loading

0 comments on commit e3f4545

Please sign in to comment.