diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..016d2ab --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ + MIT License + + Copyright (c) [2024] [ultrasev] + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5ccee41 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +

+ Deploy Python(+FastAPI) project on Vercel +

LLM API 反向代理

+ +

+ + Issues + + + GitHub pull requests + +
+Report Bug +Request Feature +

+ +众所周知,Google, Groq, Cerebras(使用了 Amazon cloudfront) 等供应商在部分国家及地区(e.g, 中国香港)不提供服务。 + +本项目旨在提供一个反向代理服务,解决在部分国家或地区无法直接访问的问题。 + +# 支持功能 + +- 支持供应商:Groq、Google、OpenAI +- 支持流式输出 +- 兼容 OpenAI API 规范 + +注:大陆不可直接访问 vercel.app 域名。如想直接访问,可参考之前作者的另一个项目[llmproxy](https://github.com/ultrasev/llmproxy),通过 cloudflare worker 部署 LLM API 反向代理。 + +# 示例 +```python +from openai import AsyncOpenAI + +``` + +# Vercel 一键部署 + +[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/ultrasev/vercel-python-fastapi/tree/master/llmproxy&demo-title=PythonDeployment&demo-description=Deploy&demo-url=https://llmproxy.vercel.app/&demo-image=https://vercel.com/button) + +# Local Development + +```bash +pip3 install -r requirements.txt +pip3 install uvicorn +uvicorn main:app --host 0.0.0.0 --port 8000 --reload +``` + +# License + +Copyright © 2024 [ultrasev](https://github.com/ultrasev).
+This project is [MIT](LICENSE) licensed. diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/hello.py b/api/hello.py new file mode 100644 index 0000000..d28fc33 --- /dev/null +++ b/api/hello.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +from fastapi.routing import APIRouter +router = APIRouter() + + +@router.get("/") +def read_root(): + return {"Hello": "World"} diff --git a/api/servers/__init__.py b/api/servers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/servers/base.py b/api/servers/base.py new file mode 100644 index 0000000..6d85e9f --- /dev/null +++ b/api/servers/base.py @@ -0,0 +1,31 @@ +from pydantic import BaseModel, Field +import httpx +import asyncio +from typing import List, Dict, Optional + + +class Message(BaseModel): + role: str + content: str + + +class OpenAIProxyArgs(BaseModel): + model: str + messages: List[Message] + stream: bool = False + temperature: float = Field(default=0.7, ge=0, le=2) + top_p: float = Field(default=1, ge=0, le=1) + n: int = Field(default=1, ge=1) + max_tokens: Optional[int] = None + presence_penalty: float = Field(default=0, ge=-2, le=2) + frequency_penalty: float = Field(default=0, ge=-2, le=2) + + +async def stream_openai_response(endpoint: str, payload: Dict, headers: Dict): + async with httpx.AsyncClient() as client: + async with client.stream("POST", endpoint, json=payload, headers=headers) as response: + async for line in response.aiter_lines(): + if line.startswith("data: "): + yield line + "\n\n" + elif line.strip() == "data: [DONE]": + break diff --git a/api/servers/gemini.py b/api/servers/gemini.py new file mode 100644 index 0000000..4689a99 --- /dev/null +++ b/api/servers/gemini.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python +''' Convert Gemini API to OpenAI API format + +Gemini API docs: +- https://ai.google.dev/gemini-api/docs/text-generation?lang=rest +''' +from loguru import logger +from pydantic import BaseModel +from fastapi import APIRouter, HTTPException, Header, Query +from fastapi.responses import JSONResponse, StreamingResponse +import httpx +import typing +from typing import List, Dict, Optional +from .base import Message +import time +import json +import re + +router = APIRouter() + + +GEMINI_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent" +GEMINI_STREAM_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:streamGenerateContent" + + +class OpenAIProxyArgs(BaseModel): + model: str + messages: List[Dict[str, str]] + stream: bool = False + temperature: float = 0.7 + top_p: float = 1 + n: int = 1 + max_tokens: Optional[int] = None + presence_penalty: float = 0 + frequency_penalty: float = 0 + + +class MessageConverter: + def __init__(self, messages: List[Dict[str, str]]): + self.messages = messages + + def convert(self) -> List[Dict[str, str]]: + converted_messages = [] + for message in self.messages: + role = "user" if message["role"] == "user" else "model" + converted_messages.append({ + "role": role, + "parts": [{"text": message["content"]}] + }) + return converted_messages + + +def convert_gemini_to_openai_response(gemini_response: dict, model: str) -> dict: + """Convert Gemini API response to OpenAI-compatible format.""" + return { + "id": gemini_response.get("candidates", [{}])[0].get("content", {}).get("role", ""), + "object": "chat.completion", + "created": int(time.time()), + "model": model, + "usage": { + "prompt_tokens": 0, # Gemini doesn't provide token counts + "completion_tokens": 0, + "total_tokens": 0 + }, + "choices": [{ + "message": { + "role": "assistant", + "content": gemini_response.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "") + }, + "finish_reason": "stop", + "index": 0 + }] + } + + +async def stream_gemini_response(model: str, payload: dict, api_key: str): + text_pattern = re.compile(r'"text": "(.*?)"') + + async with httpx.AsyncClient() as client: + async with client.stream( + "POST", + GEMINI_STREAM_ENDPOINT.format(model), + json=payload, + headers={ + "Content-Type": "application/json", + "x-goog-api-key": api_key + } + ) as response: + async for line in response.aiter_lines(): + line = line.strip() + match = text_pattern.search(line) + if match: + text_content = match.group(1) + # Unescape any escaped characters + text_content = text_content.encode().decode('unicode_escape') + + openai_format = { + "id": f"chatcmpl-{int(time.time())}", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": model, + "choices": [{ + "index": 0, + "delta": { + "content": text_content + }, + "finish_reason": None + }] + } + + yield f"data: {json.dumps(openai_format)}\n\n" + + # Send a final chunk to indicate completion + final_chunk = { + "id": f"chatcmpl-{int(time.time())}", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": model, + "choices": [{ + "index": 0, + "delta": {}, + "finish_reason": "stop" + }] + } + yield f"data: {json.dumps(final_chunk)}\n\n" + yield "data: [DONE]\n\n" + + +@router.post("/chat/completions") +async def proxy_chat_completions( + args: OpenAIProxyArgs, + authorization: str = Header(...), +): + api_key = authorization.split(" ")[1] + model = args.model + + if not api_key: + raise HTTPException(status_code=400, detail="API key not provided") + + # Transform args into Gemini API format + gemini_payload = { + "contents": MessageConverter(args.messages).convert(), + "safetySettings": [ + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "threshold": "BLOCK_ONLY_HIGH" + } + ], + "generationConfig": { + "temperature": args.temperature, + "maxOutputTokens": args.max_tokens, + "topP": args.top_p, + "topK": 10 + } + } + + if args.stream: + return StreamingResponse(stream_gemini_response(model, gemini_payload, api_key), media_type="text/event-stream") + else: + async with httpx.AsyncClient() as client: + response = await client.post( + GEMINI_ENDPOINT.format(model), + json=gemini_payload, + headers={ + "Content-Type": "application/json", + "x-goog-api-key": api_key + } + ) + logger.info(response.status_code) + + if response.status_code != 200: + return JSONResponse(content=response.json(), status_code=response.status_code) + + response_json = response.json() + + # Use the new conversion function + openai_compatible_response = convert_gemini_to_openai_response( + response_json, args.model) + + return JSONResponse(openai_compatible_response) diff --git a/api/servers/groq.py b/api/servers/groq.py new file mode 100644 index 0000000..2283799 --- /dev/null +++ b/api/servers/groq.py @@ -0,0 +1,34 @@ +from fastapi import APIRouter, Header, HTTPException +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel, Field +import httpx +import asyncio +from typing import List, Dict, Optional +from .base import stream_openai_response, OpenAIProxyArgs, Message + +router = APIRouter() +GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" + + +@router.post("/chat/completions") +async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)): + api_key = authorization.split(" ")[1] + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + payload = args.dict(exclude_none=True) + + if args.stream: + return StreamingResponse(stream_openai_response(GROQ_API_URL, payload, headers), media_type="text/event-stream") + else: + async with httpx.AsyncClient() as client: + try: + response = await client.post(GROQ_API_URL, json=payload, headers=headers) + response.raise_for_status() + return JSONResponse(response.json()) + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=e.response.status_code, detail=str(e.response.text)) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) diff --git a/api/servers/openai.py b/api/servers/openai.py new file mode 100644 index 0000000..d95b29d --- /dev/null +++ b/api/servers/openai.py @@ -0,0 +1,34 @@ +from fastapi import APIRouter, Header, HTTPException +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel, Field +import httpx +import asyncio +from typing import List, Dict, Optional +from .base import stream_openai_response, OpenAIProxyArgs, Message + +router = APIRouter() +OPENAI_API_URL = "https://api.openai.com/v1/chat/completions" + + +@router.post("/chat/completions") +async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)): + api_key = authorization.split(" ")[1] + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + payload = args.dict(exclude_none=True) + + if args.stream: + return StreamingResponse(stream_openai_response(OPENAI_API_URL, payload, headers), media_type="text/event-stream") + else: + async with httpx.AsyncClient() as client: + try: + response = await client.post(OPENAI_API_URL, json=payload, headers=headers) + response.raise_for_status() + return JSONResponse(response.json()) + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=e.response.status_code, detail=str(e.response.text)) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) diff --git a/api/v1/__init__.py b/api/v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..6b742c4 --- /dev/null +++ b/main.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +from public.usage import USAGE as html +from api.hello import router as hello_router + +from fastapi import FastAPI +from fastapi.responses import Response +from api.servers.groq import router as groq_router +from api.servers.openai import router as openai_router +from api.servers.gemini import router as gemini_router + +app = FastAPI() + +app.include_router(hello_router, prefix="/hello") +app.include_router(groq_router, prefix="/groq") +app.include_router(openai_router, prefix="/openai") +app.include_router(gemini_router, prefix="/gemini") + + +@app.get("/") +def _root(): + return Response(content=html, media_type="text/html") diff --git a/package.json b/package.json new file mode 100644 index 0000000..de76041 --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "engines": { + "node": "18.x" + } +} \ No newline at end of file diff --git a/public/__init__.py b/public/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/public/favicon.ico b/public/favicon.ico new file mode 100644 index 0000000..03dc0c9 Binary files /dev/null and b/public/favicon.ico differ diff --git a/public/usage.py b/public/usage.py new file mode 100644 index 0000000..a913f15 --- /dev/null +++ b/public/usage.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +USAGE = """ + + + + + Usage + + + +
+

success

+

Usage

+

Visit Github doc for more information.

+
+ + + +""" diff --git a/public/vercel.png b/public/vercel.png new file mode 100644 index 0000000..fdb1f6f Binary files /dev/null and b/public/vercel.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5a69ad0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +fastapi==0.88.0 +pydantic~=1.10.4 +python-multipart==0.0.5 +expiringdict==1.2.2 +rich==13.4.2 +openai==1.6.1 +httpx==0.27.0 +loguru==0.7.2 \ No newline at end of file diff --git a/tests/test_async_api.py b/tests/test_async_api.py new file mode 100644 index 0000000..22c6ab3 --- /dev/null +++ b/tests/test_async_api.py @@ -0,0 +1,83 @@ +import asyncio +import pytest +import os +from dotenv import load_dotenv +from openai import AsyncOpenAI +import random +from loguru import logger +load_dotenv() + + +def api_endpoint(): + env = os.environ.get('ENV', 'development') + if env == 'production': + return "https://vercel-python-fastapi-chi.vercel.app/" + elif env == 'development': + return "http://192.168.31.46:3000" + else: + raise ValueError(f"Invalid environment: {env}") + + +BASE_URL = api_endpoint() +logger.info(f"BASE_URL: {BASE_URL}") + + +async def make_request(supplier: str, api_key: str, model: str): + BASE_URL = api_endpoint() + f"/{supplier}" + query = "Count from 1 to 5" + + client = AsyncOpenAI(base_url=BASE_URL, api_key=api_key) + + try: + stream = await client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": query}], + stream=True, + ) + + content = "" + async for chunk in stream: + delta_content = chunk.choices[0].delta.content + if delta_content: + content += delta_content + print(f"Received chunk: {delta_content}") # Debug print + + print(f"Full content: {content}") # Debug print + + if not content: + raise ValueError("Received empty content from API") + + for i in range(1, 6): + assert str( + i) in content, f"Expected {i} in content, but it's missing. Content: {content}" + + except Exception as e: + print(f"Error occurred: {str(e)}") + raise + + +@pytest.mark.asyncio +async def test_openai_streaming(): + await make_request( + supplier="openai", + api_key=os.environ["OPENAI_API_KEY"], + model="gpt-3.5-turbo" + ) + + +@pytest.mark.asyncio +async def test_groq_streaming(): + await make_request( + supplier="groq", + api_key=os.environ["GROQ_API_KEY"], + model="llama3-70b-8192" + ) + + +@pytest.mark.asyncio +async def test_gemini_streaming(): + await make_request( + supplier="gemini", + api_key=os.environ["GEMINI_API_KEY"], + model="gemini-1.5-flash" + ) diff --git a/tests/test_sync_api.py b/tests/test_sync_api.py new file mode 100644 index 0000000..be388c9 --- /dev/null +++ b/tests/test_sync_api.py @@ -0,0 +1,68 @@ +import asyncio +import pytest +import os +from dotenv import load_dotenv +from openai import AsyncOpenAI +import random +from loguru import logger +load_dotenv() + + +def api_endpoint(): + env = os.environ.get('ENV', 'development') + if env == 'production': + return "https://vercel-python-fastapi-chi.vercel.app/" + elif env == 'development': + return "http://192.168.31.46:3000" + else: + raise ValueError(f"Invalid environment: {env}") + + +BASE_URL = api_endpoint() +logger.info(f"BASE_URL: {BASE_URL}") + + +async def make_request(api_key: str, + model: str, + supplier: str, + query: str = "what is the result of 2*21"): + client = AsyncOpenAI(base_url=BASE_URL + f"/{supplier}", api_key=api_key) + response = await client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant。"}, + {"role": "user", "content": query} + ], + temperature=0.7, + top_p=1, + max_tokens=20 + ) + print(type(response), response) + return response.choices[0].message.content + + +@pytest.mark.asyncio +async def test_groq(): + await make_request( + supplier="groq", + api_key=os.environ["GROQ_API_KEY"], + model="llama3-70b-8192" + ) + + +@pytest.mark.asyncio +async def test_openai(): + await make_request( + supplier="openai", + api_key=os.environ["OPENAI_API_KEY"], + model="gpt-4o-mini" + ) + + +@pytest.mark.asyncio +async def test_gemini(): + await make_request( + supplier="gemini", + api_key=os.environ["GEMINI_API_KEY"], + model="gemini-1.5-flash" + ) diff --git a/vercel.json b/vercel.json new file mode 100644 index 0000000..6e08ab5 --- /dev/null +++ b/vercel.json @@ -0,0 +1,14 @@ +{ + "builds": [ + { + "src": "main.py", + "use": "@vercel/python" + } + ], + "routes": [ + { + "src": "/(.*)", + "dest": "main.py" + } + ] +} \ No newline at end of file