Skip to content

Commit

Permalink
fix: remove rate-limiting for embeddings (#143)
Browse files Browse the repository at this point in the history
Co-authored-by: leoguillaume <[email protected]>
  • Loading branch information
leoguillaume and leoguillaumegouv authored Jan 17, 2025
1 parent e439024 commit 88cbba9
Show file tree
Hide file tree
Showing 15 changed files with 21 additions and 37 deletions.
1 change: 0 additions & 1 deletion app/clients/_modelclients.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def create_chat_completions(self, *args, **kwargs):
raise HTTPException(status_code=e.response.status_code, detail=json.loads(e.response.text)["message"])


# @TODO : useless ?
def create_embeddings(self, *args, **kwargs):
"""
Custom method to overwrite OpenAI's create method to raise HTTPException from model API.
Expand Down
2 changes: 1 addition & 1 deletion app/clients/internet/_braveinternetclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def get_result_urls(self, query: str, n: int = 3) -> List[str]:
response = requests.get(url=self.URL, headers=self.headers, params=params)
results = response.json().get("web", {}).get("results", [])
except Exception as e:
logger.warning(msq=f"Brave Search API error: {str(e)}")
logger.warning(msg=f"Brave Search API error: {str(e)}")
results = []

return [result["url"].lower() for result in results]
5 changes: 1 addition & 4 deletions app/endpoints/chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,12 @@
from app.schemas.chunks import Chunks
from app.schemas.security import User
from app.utils.lifespan import clients
from app.utils.security import check_api_key, check_rate_limit
from app.utils.settings import settings
from app.utils.lifespan import limiter
from app.utils.security import check_api_key

router = APIRouter()


@router.get("/chunks/{collection}/{document}")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def get_chunks(
request: Request,
collection: UUID,
Expand Down
8 changes: 2 additions & 6 deletions app/endpoints/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@

from app.schemas.collections import Collection, CollectionRequest, Collections
from app.schemas.security import User
from app.utils.lifespan import clients, limiter
from app.utils.security import check_api_key, check_rate_limit
from app.utils.settings import settings
from app.utils.lifespan import clients
from app.utils.security import check_api_key
from app.utils.variables import INTERNET_COLLECTION_DISPLAY_ID, PUBLIC_COLLECTION_TYPE

router = APIRouter()


@router.post("/collections")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def create_collection(request: Request, body: CollectionRequest, user: User = Security(check_api_key)) -> Response:
"""
Create a new collection.
Expand All @@ -35,7 +33,6 @@ async def create_collection(request: Request, body: CollectionRequest, user: Use


@router.get("/collections")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def get_collections(request: Request, user: User = Security(check_api_key)) -> Union[Collection, Collections]:
"""
Get list of collections.
Expand All @@ -54,7 +51,6 @@ async def get_collections(request: Request, user: User = Security(check_api_key)


@router.delete("/collections/{collection}")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def delete_collections(request: Request, collection: UUID, user: User = Security(check_api_key)) -> Response:
"""
Delete a collection.
Expand Down
7 changes: 2 additions & 5 deletions app/endpoints/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@

from app.schemas.documents import Documents
from app.schemas.security import User
from app.utils.lifespan import clients, limiter
from app.utils.security import check_api_key, check_rate_limit
from app.utils.settings import settings
from app.utils.lifespan import clients
from app.utils.security import check_api_key

router = APIRouter()


@router.get("/documents/{collection}")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def get_documents(
request: Request,
collection: UUID,
Expand All @@ -31,7 +29,6 @@ async def get_documents(


@router.delete("/documents/{collection}/{document}")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def delete_document(request: Request, collection: UUID, document: UUID, user: User = Security(check_api_key)) -> Response:
"""
Delete a document and relative collections.
Expand Down
6 changes: 2 additions & 4 deletions app/endpoints/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@
from app.schemas.embeddings import Embeddings, EmbeddingsRequest
from app.schemas.security import User
from app.utils.exceptions import WrongModelTypeException
from app.utils.lifespan import clients, limiter
from app.utils.lifespan import clients
from app.utils.route import forward_request
from app.utils.security import check_api_key, check_rate_limit
from app.utils.settings import settings
from app.utils.security import check_api_key
from app.utils.variables import DEFAULT_TIMEOUT, EMBEDDINGS_MODEL_TYPE

router = APIRouter()


@router.post(path="/embeddings")
@limiter.limit(limit_value=settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def embeddings(request: Request, body: EmbeddingsRequest, user: User = Security(dependency=check_api_key)) -> Embeddings:
"""
Embedding API similar to OpenAI's API.
Expand Down
6 changes: 2 additions & 4 deletions app/endpoints/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@

from fastapi import APIRouter, Request, Security

from app.utils.settings import settings
from app.schemas.models import Model, Models
from app.schemas.security import User
from app.utils.lifespan import clients, limiter
from app.utils.security import check_api_key, check_rate_limit
from app.utils.lifespan import clients
from app.utils.security import check_api_key

router = APIRouter()


@router.get("/models/{model:path}")
@router.get("/models")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def models(request: Request, model: Optional[str] = None, user: User = Security(check_api_key)) -> Union[Models, Model]:
"""
Model API similar to OpenAI's API.
Expand Down
6 changes: 2 additions & 4 deletions app/endpoints/rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
from app.helpers import LanguageModelReranker
from app.schemas.rerank import RerankRequest, Reranks
from app.schemas.security import User
from app.utils.lifespan import clients, limiter
from app.utils.security import check_api_key, check_rate_limit
from app.utils.settings import settings
from app.utils.lifespan import clients
from app.utils.security import check_api_key
from app.utils.variables import LANGUAGE_MODEL_TYPE, RERANK_MODEL_TYPE

from app.utils.exceptions import WrongModelTypeException
Expand All @@ -14,7 +13,6 @@


@router.post("/rerank")
@limiter.limit(settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def rerank(request: Request, body: RerankRequest, user: User = Security(check_api_key)):
"""
Rerank a list of inputs with a language model or reranker model.
Expand Down
5 changes: 2 additions & 3 deletions app/endpoints/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
from app.helpers import InternetManager, SearchManager
from app.schemas.search import Searches, SearchRequest
from app.schemas.security import User
from app.utils.lifespan import clients, limiter
from app.utils.security import check_api_key, check_rate_limit
from app.utils.lifespan import clients
from app.utils.security import check_api_key
from app.utils.settings import settings

router = APIRouter()


@router.post(path="/search")
@limiter.limit(limit_value=settings.rate_limit.by_key, key_func=lambda request: check_rate_limit(request=request))
async def search(request: Request, body: SearchRequest, user: User = Security(dependency=check_api_key)) -> Searches:
"""
Endpoint to search on the internet or with our search client.
Expand Down
1 change: 1 addition & 0 deletions app/helpers/_clientsmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def set(self):
self.models = ModelClients(settings=self.settings)

self.cache = CacheManager(connection_pool=ConnectionPool(**self.settings.clients.cache.args))
# @TODO: check if cache is reachable

if self.settings.clients.search.type == SEARCH_CLIENT_ELASTIC_TYPE:
self.search = ElasticSearchClient(models=self.models, **self.settings.clients.search.args)
Expand Down
6 changes: 3 additions & 3 deletions app/helpers/_fileuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
from app.schemas.data import ParserOutput
from app.schemas.security import User
from app.utils.exceptions import InvalidJSONFormatException, NoChunksToUpsertException, ParsingFileFailedException, UnsupportedFileTypeException
from app.utils.variables import CHUNKERS, DEFAULT_CHUNKER, HTML_TYPE, JSON_TYPE, PDF_TYPE, MARKDOWN_TYPE
from app.utils.variables import CHUNKERS, DEFAULT_CHUNKER, HTML_TYPE, JSON_TYPE, PDF_TYPE, MD_TYPE


class FileUploader:
TYPE_DICT = {
"json": JSON_TYPE,
"html": HTML_TYPE,
"pdf": PDF_TYPE,
"md": MARKDOWN_TYPE,
"md": MD_TYPE,
}

def __init__(self, collection_id: str, search_client: SearchClient, user: User):
Expand All @@ -43,7 +43,7 @@ def parse(self, file: UploadFile) -> List[ParserOutput]:
elif file_type == HTML_TYPE:
parser = HTMLParser(collection_id=self.collection_id)

elif file_type == MARKDOWN_TYPE:
elif file_type == MD_TYPE:
parser = MarkdownParser(collection_id=self.collection_id)

try:
Expand Down
1 change: 0 additions & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ async def root():


# Prometheus metrics
# @TODO: env_var_name="ENABLE_METRICS"
app.instrumentator = Instrumentator().instrument(app=app)

# Middlewares
Expand Down
1 change: 1 addition & 0 deletions app/schemas/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class Config(ConfigBaseModel):
class Settings(BaseSettings):
# logging
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
enable_metrics: bool = True

# config
config_file: str = "config.yml"
Expand Down
2 changes: 1 addition & 1 deletion app/utils/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
JSON_TYPE = "application/json"
TXT_TYPE = "text/plain"
HTML_TYPE = "text/html"
MARKDOWN_TYPE = "text/markdown"
MD_TYPE = "text/markdown"
# @TODO : add DOCX_TYPE (application/vnd.openxmlformats-officedocument.wordprocessingml.document)

# Clients
Expand Down
1 change: 1 addition & 0 deletions docs/deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Les variables d'environnements sont celles propres à FastAPI.
| APP_VERSION | Version de l'application (par défaut : "0.0.0") |
| APP_DESCRIPTION | Description de l'application (par défaut : None) |
| CONFIG_FILE | Chemin vers le fichier de configuration (par défaut : "config.yml") |
| ENABLE_METRICS | Active ou désactive les métriques Prometheus (par défaut : True) |
| LOG_LEVEL | Niveau de journalisation (par défaut : DEBUG) |

### Fichier de configuration (config.yml)
Expand Down

0 comments on commit 88cbba9

Please sign in to comment.