diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..016d2ab
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+ MIT License
+
+ Copyright (c) [2024] [ultrasev]
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5ccee41
--- /dev/null
+++ b/README.md
@@ -0,0 +1,50 @@
+
+
+
LLM API 反向代理
+
+
+
+
+
+
+
+
+
+Report Bug
+Request Feature
+
+
+众所周知,Google, Groq, Cerebras(使用了 Amazon cloudfront) 等供应商在部分国家及地区(e.g, 中国香港)不提供服务。
+
+本项目旨在提供一个反向代理服务,解决在部分国家或地区无法直接访问的问题。
+
+# 支持功能
+
+- 支持供应商:Groq、Google、OpenAI
+- 支持流式输出
+- 兼容 OpenAI API 规范
+
+注:大陆不可直接访问 vercel.app 域名。如想直接访问,可参考之前作者的另一个项目[llmproxy](https://github.com/ultrasev/llmproxy),通过 cloudflare worker 部署 LLM API 反向代理。
+
+# 示例
+```python
+from openai import AsyncOpenAI
+
+```
+
+# Vercel 一键部署
+
+[](https://vercel.com/new/clone?repository-url=https://github.com/ultrasev/vercel-python-fastapi/tree/master/llmproxy&demo-title=PythonDeployment&demo-description=Deploy&demo-url=https://llmproxy.vercel.app/&demo-image=https://vercel.com/button)
+
+# Local Development
+
+```bash
+pip3 install -r requirements.txt
+pip3 install uvicorn
+uvicorn main:app --host 0.0.0.0 --port 8000 --reload
+```
+
+# License
+
+Copyright © 2024 [ultrasev](https://github.com/ultrasev).
+This project is [MIT](LICENSE) licensed.
diff --git a/api/__init__.py b/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/api/hello.py b/api/hello.py
new file mode 100644
index 0000000..d28fc33
--- /dev/null
+++ b/api/hello.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+from fastapi.routing import APIRouter
+router = APIRouter()
+
+
+@router.get("/")
+def read_root():
+ return {"Hello": "World"}
diff --git a/api/servers/__init__.py b/api/servers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/api/servers/base.py b/api/servers/base.py
new file mode 100644
index 0000000..6d85e9f
--- /dev/null
+++ b/api/servers/base.py
@@ -0,0 +1,31 @@
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+
+
+class Message(BaseModel):
+ role: str
+ content: str
+
+
+class OpenAIProxyArgs(BaseModel):
+ model: str
+ messages: List[Message]
+ stream: bool = False
+ temperature: float = Field(default=0.7, ge=0, le=2)
+ top_p: float = Field(default=1, ge=0, le=1)
+ n: int = Field(default=1, ge=1)
+ max_tokens: Optional[int] = None
+ presence_penalty: float = Field(default=0, ge=-2, le=2)
+ frequency_penalty: float = Field(default=0, ge=-2, le=2)
+
+
+async def stream_openai_response(endpoint: str, payload: Dict, headers: Dict):
+ async with httpx.AsyncClient() as client:
+ async with client.stream("POST", endpoint, json=payload, headers=headers) as response:
+ async for line in response.aiter_lines():
+ if line.startswith("data: "):
+ yield line + "\n\n"
+ elif line.strip() == "data: [DONE]":
+ break
diff --git a/api/servers/gemini.py b/api/servers/gemini.py
new file mode 100644
index 0000000..4689a99
--- /dev/null
+++ b/api/servers/gemini.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+''' Convert Gemini API to OpenAI API format
+
+Gemini API docs:
+- https://ai.google.dev/gemini-api/docs/text-generation?lang=rest
+'''
+from loguru import logger
+from pydantic import BaseModel
+from fastapi import APIRouter, HTTPException, Header, Query
+from fastapi.responses import JSONResponse, StreamingResponse
+import httpx
+import typing
+from typing import List, Dict, Optional
+from .base import Message
+import time
+import json
+import re
+
+router = APIRouter()
+
+
+GEMINI_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent"
+GEMINI_STREAM_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:streamGenerateContent"
+
+
+class OpenAIProxyArgs(BaseModel):
+ model: str
+ messages: List[Dict[str, str]]
+ stream: bool = False
+ temperature: float = 0.7
+ top_p: float = 1
+ n: int = 1
+ max_tokens: Optional[int] = None
+ presence_penalty: float = 0
+ frequency_penalty: float = 0
+
+
+class MessageConverter:
+ def __init__(self, messages: List[Dict[str, str]]):
+ self.messages = messages
+
+ def convert(self) -> List[Dict[str, str]]:
+ converted_messages = []
+ for message in self.messages:
+ role = "user" if message["role"] == "user" else "model"
+ converted_messages.append({
+ "role": role,
+ "parts": [{"text": message["content"]}]
+ })
+ return converted_messages
+
+
+def convert_gemini_to_openai_response(gemini_response: dict, model: str) -> dict:
+ """Convert Gemini API response to OpenAI-compatible format."""
+ return {
+ "id": gemini_response.get("candidates", [{}])[0].get("content", {}).get("role", ""),
+ "object": "chat.completion",
+ "created": int(time.time()),
+ "model": model,
+ "usage": {
+ "prompt_tokens": 0, # Gemini doesn't provide token counts
+ "completion_tokens": 0,
+ "total_tokens": 0
+ },
+ "choices": [{
+ "message": {
+ "role": "assistant",
+ "content": gemini_response.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
+ },
+ "finish_reason": "stop",
+ "index": 0
+ }]
+ }
+
+
+async def stream_gemini_response(model: str, payload: dict, api_key: str):
+ text_pattern = re.compile(r'"text": "(.*?)"')
+
+ async with httpx.AsyncClient() as client:
+ async with client.stream(
+ "POST",
+ GEMINI_STREAM_ENDPOINT.format(model),
+ json=payload,
+ headers={
+ "Content-Type": "application/json",
+ "x-goog-api-key": api_key
+ }
+ ) as response:
+ async for line in response.aiter_lines():
+ line = line.strip()
+ match = text_pattern.search(line)
+ if match:
+ text_content = match.group(1)
+ # Unescape any escaped characters
+ text_content = text_content.encode().decode('unicode_escape')
+
+ openai_format = {
+ "id": f"chatcmpl-{int(time.time())}",
+ "object": "chat.completion.chunk",
+ "created": int(time.time()),
+ "model": model,
+ "choices": [{
+ "index": 0,
+ "delta": {
+ "content": text_content
+ },
+ "finish_reason": None
+ }]
+ }
+
+ yield f"data: {json.dumps(openai_format)}\n\n"
+
+ # Send a final chunk to indicate completion
+ final_chunk = {
+ "id": f"chatcmpl-{int(time.time())}",
+ "object": "chat.completion.chunk",
+ "created": int(time.time()),
+ "model": model,
+ "choices": [{
+ "index": 0,
+ "delta": {},
+ "finish_reason": "stop"
+ }]
+ }
+ yield f"data: {json.dumps(final_chunk)}\n\n"
+ yield "data: [DONE]\n\n"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(
+ args: OpenAIProxyArgs,
+ authorization: str = Header(...),
+):
+ api_key = authorization.split(" ")[1]
+ model = args.model
+
+ if not api_key:
+ raise HTTPException(status_code=400, detail="API key not provided")
+
+ # Transform args into Gemini API format
+ gemini_payload = {
+ "contents": MessageConverter(args.messages).convert(),
+ "safetySettings": [
+ {
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+ "threshold": "BLOCK_ONLY_HIGH"
+ }
+ ],
+ "generationConfig": {
+ "temperature": args.temperature,
+ "maxOutputTokens": args.max_tokens,
+ "topP": args.top_p,
+ "topK": 10
+ }
+ }
+
+ if args.stream:
+ return StreamingResponse(stream_gemini_response(model, gemini_payload, api_key), media_type="text/event-stream")
+ else:
+ async with httpx.AsyncClient() as client:
+ response = await client.post(
+ GEMINI_ENDPOINT.format(model),
+ json=gemini_payload,
+ headers={
+ "Content-Type": "application/json",
+ "x-goog-api-key": api_key
+ }
+ )
+ logger.info(response.status_code)
+
+ if response.status_code != 200:
+ return JSONResponse(content=response.json(), status_code=response.status_code)
+
+ response_json = response.json()
+
+ # Use the new conversion function
+ openai_compatible_response = convert_gemini_to_openai_response(
+ response_json, args.model)
+
+ return JSONResponse(openai_compatible_response)
diff --git a/api/servers/groq.py b/api/servers/groq.py
new file mode 100644
index 0000000..2283799
--- /dev/null
+++ b/api/servers/groq.py
@@ -0,0 +1,34 @@
+from fastapi import APIRouter, Header, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+from .base import stream_openai_response, OpenAIProxyArgs, Message
+
+router = APIRouter()
+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)):
+ api_key = authorization.split(" ")[1]
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json"
+ }
+ payload = args.dict(exclude_none=True)
+
+ if args.stream:
+ return StreamingResponse(stream_openai_response(GROQ_API_URL, payload, headers), media_type="text/event-stream")
+ else:
+ async with httpx.AsyncClient() as client:
+ try:
+ response = await client.post(GROQ_API_URL, json=payload, headers=headers)
+ response.raise_for_status()
+ return JSONResponse(response.json())
+ except httpx.HTTPStatusError as e:
+ raise HTTPException(
+ status_code=e.response.status_code, detail=str(e.response.text))
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
diff --git a/api/servers/openai.py b/api/servers/openai.py
new file mode 100644
index 0000000..d95b29d
--- /dev/null
+++ b/api/servers/openai.py
@@ -0,0 +1,34 @@
+from fastapi import APIRouter, Header, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+from .base import stream_openai_response, OpenAIProxyArgs, Message
+
+router = APIRouter()
+OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)):
+ api_key = authorization.split(" ")[1]
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json"
+ }
+ payload = args.dict(exclude_none=True)
+
+ if args.stream:
+ return StreamingResponse(stream_openai_response(OPENAI_API_URL, payload, headers), media_type="text/event-stream")
+ else:
+ async with httpx.AsyncClient() as client:
+ try:
+ response = await client.post(OPENAI_API_URL, json=payload, headers=headers)
+ response.raise_for_status()
+ return JSONResponse(response.json())
+ except httpx.HTTPStatusError as e:
+ raise HTTPException(
+ status_code=e.response.status_code, detail=str(e.response.text))
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
diff --git a/api/v1/__init__.py b/api/v1/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..6b742c4
--- /dev/null
+++ b/main.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+from public.usage import USAGE as html
+from api.hello import router as hello_router
+
+from fastapi import FastAPI
+from fastapi.responses import Response
+from api.servers.groq import router as groq_router
+from api.servers.openai import router as openai_router
+from api.servers.gemini import router as gemini_router
+
+app = FastAPI()
+
+app.include_router(hello_router, prefix="/hello")
+app.include_router(groq_router, prefix="/groq")
+app.include_router(openai_router, prefix="/openai")
+app.include_router(gemini_router, prefix="/gemini")
+
+
+@app.get("/")
+def _root():
+ return Response(content=html, media_type="text/html")
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..de76041
--- /dev/null
+++ b/package.json
@@ -0,0 +1,5 @@
+{
+ "engines": {
+ "node": "18.x"
+ }
+}
\ No newline at end of file
diff --git a/public/__init__.py b/public/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/public/favicon.ico b/public/favicon.ico
new file mode 100644
index 0000000..03dc0c9
Binary files /dev/null and b/public/favicon.ico differ
diff --git a/public/usage.py b/public/usage.py
new file mode 100644
index 0000000..a913f15
--- /dev/null
+++ b/public/usage.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+USAGE = """
+
+
+
+
+ Usage
+
+
+
+
+
success
+
Usage
+
Visit Github doc for more information.
+
+
+
+
+"""
diff --git a/public/vercel.png b/public/vercel.png
new file mode 100644
index 0000000..fdb1f6f
Binary files /dev/null and b/public/vercel.png differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..5a69ad0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+fastapi==0.88.0
+pydantic~=1.10.4
+python-multipart==0.0.5
+expiringdict==1.2.2
+rich==13.4.2
+openai==1.6.1
+httpx==0.27.0
+loguru==0.7.2
\ No newline at end of file
diff --git a/tests/test_async_api.py b/tests/test_async_api.py
new file mode 100644
index 0000000..22c6ab3
--- /dev/null
+++ b/tests/test_async_api.py
@@ -0,0 +1,83 @@
+import asyncio
+import pytest
+import os
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
+import random
+from loguru import logger
+load_dotenv()
+
+
+def api_endpoint():
+ env = os.environ.get('ENV', 'development')
+ if env == 'production':
+ return "https://vercel-python-fastapi-chi.vercel.app/"
+ elif env == 'development':
+ return "http://192.168.31.46:3000"
+ else:
+ raise ValueError(f"Invalid environment: {env}")
+
+
+BASE_URL = api_endpoint()
+logger.info(f"BASE_URL: {BASE_URL}")
+
+
+async def make_request(supplier: str, api_key: str, model: str):
+ BASE_URL = api_endpoint() + f"/{supplier}"
+ query = "Count from 1 to 5"
+
+ client = AsyncOpenAI(base_url=BASE_URL, api_key=api_key)
+
+ try:
+ stream = await client.chat.completions.create(
+ model=model,
+ messages=[{"role": "user", "content": query}],
+ stream=True,
+ )
+
+ content = ""
+ async for chunk in stream:
+ delta_content = chunk.choices[0].delta.content
+ if delta_content:
+ content += delta_content
+ print(f"Received chunk: {delta_content}") # Debug print
+
+ print(f"Full content: {content}") # Debug print
+
+ if not content:
+ raise ValueError("Received empty content from API")
+
+ for i in range(1, 6):
+ assert str(
+ i) in content, f"Expected {i} in content, but it's missing. Content: {content}"
+
+ except Exception as e:
+ print(f"Error occurred: {str(e)}")
+ raise
+
+
+@pytest.mark.asyncio
+async def test_openai_streaming():
+ await make_request(
+ supplier="openai",
+ api_key=os.environ["OPENAI_API_KEY"],
+ model="gpt-3.5-turbo"
+ )
+
+
+@pytest.mark.asyncio
+async def test_groq_streaming():
+ await make_request(
+ supplier="groq",
+ api_key=os.environ["GROQ_API_KEY"],
+ model="llama3-70b-8192"
+ )
+
+
+@pytest.mark.asyncio
+async def test_gemini_streaming():
+ await make_request(
+ supplier="gemini",
+ api_key=os.environ["GEMINI_API_KEY"],
+ model="gemini-1.5-flash"
+ )
diff --git a/tests/test_sync_api.py b/tests/test_sync_api.py
new file mode 100644
index 0000000..be388c9
--- /dev/null
+++ b/tests/test_sync_api.py
@@ -0,0 +1,68 @@
+import asyncio
+import pytest
+import os
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
+import random
+from loguru import logger
+load_dotenv()
+
+
+def api_endpoint():
+ env = os.environ.get('ENV', 'development')
+ if env == 'production':
+ return "https://vercel-python-fastapi-chi.vercel.app/"
+ elif env == 'development':
+ return "http://192.168.31.46:3000"
+ else:
+ raise ValueError(f"Invalid environment: {env}")
+
+
+BASE_URL = api_endpoint()
+logger.info(f"BASE_URL: {BASE_URL}")
+
+
+async def make_request(api_key: str,
+ model: str,
+ supplier: str,
+ query: str = "what is the result of 2*21"):
+ client = AsyncOpenAI(base_url=BASE_URL + f"/{supplier}", api_key=api_key)
+ response = await client.chat.completions.create(
+ model=model,
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant。"},
+ {"role": "user", "content": query}
+ ],
+ temperature=0.7,
+ top_p=1,
+ max_tokens=20
+ )
+ print(type(response), response)
+ return response.choices[0].message.content
+
+
+@pytest.mark.asyncio
+async def test_groq():
+ await make_request(
+ supplier="groq",
+ api_key=os.environ["GROQ_API_KEY"],
+ model="llama3-70b-8192"
+ )
+
+
+@pytest.mark.asyncio
+async def test_openai():
+ await make_request(
+ supplier="openai",
+ api_key=os.environ["OPENAI_API_KEY"],
+ model="gpt-4o-mini"
+ )
+
+
+@pytest.mark.asyncio
+async def test_gemini():
+ await make_request(
+ supplier="gemini",
+ api_key=os.environ["GEMINI_API_KEY"],
+ model="gemini-1.5-flash"
+ )
diff --git a/vercel.json b/vercel.json
new file mode 100644
index 0000000..6e08ab5
--- /dev/null
+++ b/vercel.json
@@ -0,0 +1,14 @@
+{
+ "builds": [
+ {
+ "src": "main.py",
+ "use": "@vercel/python"
+ }
+ ],
+ "routes": [
+ {
+ "src": "/(.*)",
+ "dest": "main.py"
+ }
+ ]
+}
\ No newline at end of file