init

ultrasev · Oct 16, 2024 · 888f3d0 · 888f3d0
commit 888f3d0
Show file tree

Hide file tree

Showing 20 changed files with 608 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,22 @@
+ MIT License                                                                                                          
+
+ Copyright (c) [2024] [ultrasev]                                                                              
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy                                         
+ of this software and associated documentation files (the "Software"), to deal                                        
+ in the Software without restriction, including without limitation the rights                                         
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell                                            
+ copies of the Software, and to permit persons to whom the Software is                                                
+ furnished to do so, subject to the following conditions:                                                             
+
+ The above copyright notice and this permission notice shall be included in all                                       
+ copies or substantial portions of the Software.                                                                      
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR                                           
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,                                             
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE                                          
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER                                               
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,                                        
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE                                        
+ SOFTWARE.                                                                                                            
+
diff --git a/README.md b/README.md
@@ -0,0 +1,50 @@
+<p align="center">
+ <img width="100px" src="public/vercel.png" align="center" alt="Deploy Python(+FastAPI) project on Vercel" />
+ <h2 align="center"> LLM API 反向代理 </h2>
+
+<p align="center">
+  <a href="https://github.com/ultrasev/vercel-python-fastapi/issues">
+    <img alt="Issues" src="https://img.shields.io/github/issues/ultrasev/vercel-python-fastapi?style=flat&color=336791" />
+  </a>
+  <a href="https://github.com/ultrasev/vercel-python-fastapi/pulls">
+    <img alt="GitHub pull requests" src="https://img.shields.io/github/issues-pr/ultrasev/vercel-python-fastapi?style=flat&color=336791" />
+  </a>
+  <br />
+<a href="https://github.com/ultrasev/vercel-python-fastapi/issues/new/choose">Report Bug</a>
+<a href="https://github.com/ultrasev/vercel-python-fastapi/issues/new/choose">Request Feature</a>
+</p>
+
+众所周知，Google, Groq, Cerebras（使用了 Amazon cloudfront） 等供应商在部分国家及地区（e.g, 中国香港）不提供服务。
+
+本项目旨在提供一个反向代理服务，解决在部分国家或地区无法直接访问的问题。
+
+# 支持功能
+
+- 支持供应商：Groq、Google、OpenAI
+- 支持流式输出
+- 兼容 OpenAI API 规范
+
+注：大陆不可直接访问 vercel.app 域名。如想直接访问，可参考之前作者的另一个项目[llmproxy](https://github.com/ultrasev/llmproxy)，通过 cloudflare worker 部署 LLM API 反向代理。
+
+# 示例
+```python
+from openai import AsyncOpenAI
+
+```
+
+# Vercel 一键部署
+
+[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/ultrasev/vercel-python-fastapi/tree/master/llmproxy&demo-title=PythonDeployment&demo-description=Deploy&demo-url=https://llmproxy.vercel.app/&demo-image=https://vercel.com/button)
+
+# Local Development
+
+```bash
+pip3 install -r requirements.txt
+pip3 install uvicorn
+uvicorn main:app --host 0.0.0.0 --port 8000 --reload
+```
+
+# License
+
+Copyright © 2024 [ultrasev](https://github.com/ultrasev).<br />
+This project is [MIT](LICENSE) licensed.
diff --git a/api/__init__.py b/api/__init__.py
diff --git a/api/hello.py b/api/hello.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+from fastapi.routing import APIRouter
+router = APIRouter()
+
+
+@router.get("/")
+def read_root():
+    return {"Hello": "World"}
diff --git a/api/servers/__init__.py b/api/servers/__init__.py
diff --git a/api/servers/base.py b/api/servers/base.py
@@ -0,0 +1,31 @@
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class OpenAIProxyArgs(BaseModel):
+    model: str
+    messages: List[Message]
+    stream: bool = False
+    temperature: float = Field(default=0.7, ge=0, le=2)
+    top_p: float = Field(default=1, ge=0, le=1)
+    n: int = Field(default=1, ge=1)
+    max_tokens: Optional[int] = None
+    presence_penalty: float = Field(default=0, ge=-2, le=2)
+    frequency_penalty: float = Field(default=0, ge=-2, le=2)
+
+
+async def stream_openai_response(endpoint: str, payload: Dict, headers: Dict):
+    async with httpx.AsyncClient() as client:
+        async with client.stream("POST", endpoint, json=payload, headers=headers) as response:
+            async for line in response.aiter_lines():
+                if line.startswith("data: "):
+                    yield line + "\n\n"
+                elif line.strip() == "data: [DONE]":
+                    break
diff --git a/api/servers/gemini.py b/api/servers/gemini.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+''' Convert Gemini API to OpenAI API format
+
+Gemini API docs:
+- https://ai.google.dev/gemini-api/docs/text-generation?lang=rest
+'''
+from loguru import logger
+from pydantic import BaseModel
+from fastapi import APIRouter, HTTPException, Header, Query
+from fastapi.responses import JSONResponse, StreamingResponse
+import httpx
+import typing
+from typing import List, Dict, Optional
+from .base import Message
+import time
+import json
+import re
+
+router = APIRouter()
+
+
+GEMINI_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent"
+GEMINI_STREAM_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:streamGenerateContent"
+
+
+class OpenAIProxyArgs(BaseModel):
+    model: str
+    messages: List[Dict[str, str]]
+    stream: bool = False
+    temperature: float = 0.7
+    top_p: float = 1
+    n: int = 1
+    max_tokens: Optional[int] = None
+    presence_penalty: float = 0
+    frequency_penalty: float = 0
+
+
+class MessageConverter:
+    def __init__(self, messages: List[Dict[str, str]]):
+        self.messages = messages
+
+    def convert(self) -> List[Dict[str, str]]:
+        converted_messages = []
+        for message in self.messages:
+            role = "user" if message["role"] == "user" else "model"
+            converted_messages.append({
+                "role": role,
+                "parts": [{"text": message["content"]}]
+            })
+        return converted_messages
+
+
+def convert_gemini_to_openai_response(gemini_response: dict, model: str) -> dict:
+    """Convert Gemini API response to OpenAI-compatible format."""
+    return {
+        "id": gemini_response.get("candidates", [{}])[0].get("content", {}).get("role", ""),
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "usage": {
+            "prompt_tokens": 0,  # Gemini doesn't provide token counts
+            "completion_tokens": 0,
+            "total_tokens": 0
+        },
+        "choices": [{
+            "message": {
+                "role": "assistant",
+                "content": gemini_response.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
+            },
+            "finish_reason": "stop",
+            "index": 0
+        }]
+    }
+
+
+async def stream_gemini_response(model: str, payload: dict, api_key: str):
+    text_pattern = re.compile(r'"text": "(.*?)"')
+
+    async with httpx.AsyncClient() as client:
+        async with client.stream(
+            "POST",
+            GEMINI_STREAM_ENDPOINT.format(model),
+            json=payload,
+            headers={
+                "Content-Type": "application/json",
+                "x-goog-api-key": api_key
+            }
+        ) as response:
+            async for line in response.aiter_lines():
+                line = line.strip()
+                match = text_pattern.search(line)
+                if match:
+                    text_content = match.group(1)
+                    # Unescape any escaped characters
+                    text_content = text_content.encode().decode('unicode_escape')
+
+                    openai_format = {
+                        "id": f"chatcmpl-{int(time.time())}",
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": model,
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "content": text_content
+                            },
+                            "finish_reason": None
+                        }]
+                    }
+
+                    yield f"data: {json.dumps(openai_format)}\n\n"
+
+    # Send a final chunk to indicate completion
+    final_chunk = {
+        "id": f"chatcmpl-{int(time.time())}",
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [{
+            "index": 0,
+            "delta": {},
+            "finish_reason": "stop"
+        }]
+    }
+    yield f"data: {json.dumps(final_chunk)}\n\n"
+    yield "data: [DONE]\n\n"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(
+    args: OpenAIProxyArgs,
+    authorization: str = Header(...),
+):
+    api_key = authorization.split(" ")[1]
+    model = args.model
+
+    if not api_key:
+        raise HTTPException(status_code=400, detail="API key not provided")
+
+    # Transform args into Gemini API format
+    gemini_payload = {
+        "contents": MessageConverter(args.messages).convert(),
+        "safetySettings": [
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_ONLY_HIGH"
+            }
+        ],
+        "generationConfig": {
+            "temperature": args.temperature,
+            "maxOutputTokens": args.max_tokens,
+            "topP": args.top_p,
+            "topK": 10
+        }
+    }
+
+    if args.stream:
+        return StreamingResponse(stream_gemini_response(model, gemini_payload, api_key), media_type="text/event-stream")
+    else:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                GEMINI_ENDPOINT.format(model),
+                json=gemini_payload,
+                headers={
+                    "Content-Type": "application/json",
+                    "x-goog-api-key": api_key
+                }
+            )
+            logger.info(response.status_code)
+
+            if response.status_code != 200:
+                return JSONResponse(content=response.json(), status_code=response.status_code)
+
+            response_json = response.json()
+
+            # Use the new conversion function
+            openai_compatible_response = convert_gemini_to_openai_response(
+                response_json, args.model)
+
+            return JSONResponse(openai_compatible_response)
diff --git a/api/servers/groq.py b/api/servers/groq.py
@@ -0,0 +1,34 @@
+from fastapi import APIRouter, Header, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+from .base import stream_openai_response, OpenAIProxyArgs, Message
+
+router = APIRouter()
+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)):
+    api_key = authorization.split(" ")[1]
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    payload = args.dict(exclude_none=True)
+
+    if args.stream:
+        return StreamingResponse(stream_openai_response(GROQ_API_URL, payload, headers), media_type="text/event-stream")
+    else:
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(GROQ_API_URL, json=payload, headers=headers)
+                response.raise_for_status()
+                return JSONResponse(response.json())
+            except httpx.HTTPStatusError as e:
+                raise HTTPException(
+                    status_code=e.response.status_code, detail=str(e.response.text))
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=str(e))
diff --git a/api/servers/openai.py b/api/servers/openai.py
@@ -0,0 +1,34 @@
+from fastapi import APIRouter, Header, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+from .base import stream_openai_response, OpenAIProxyArgs, Message
+
+router = APIRouter()
+OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)):
+    api_key = authorization.split(" ")[1]
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    payload = args.dict(exclude_none=True)
+
+    if args.stream:
+        return StreamingResponse(stream_openai_response(OPENAI_API_URL, payload, headers), media_type="text/event-stream")
+    else:
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(OPENAI_API_URL, json=payload, headers=headers)
+                response.raise_for_status()
+                return JSONResponse(response.json())
+            except httpx.HTTPStatusError as e:
+                raise HTTPException(
+                    status_code=e.response.status_code, detail=str(e.response.text))
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=str(e))
diff --git a/api/v1/__init__.py b/api/v1/__init__.py
diff --git a/main.py b/main.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+from public.usage import USAGE as html
+from api.hello import router as hello_router
+
+from fastapi import FastAPI
+from fastapi.responses import Response
+from api.servers.groq import router as groq_router
+from api.servers.openai import router as openai_router
+from api.servers.gemini import router as gemini_router
+
+app = FastAPI()
+
+app.include_router(hello_router, prefix="/hello")
+app.include_router(groq_router, prefix="/groq")
+app.include_router(openai_router, prefix="/openai")
+app.include_router(gemini_router, prefix="/gemini")
+
+
+@app.get("/")
+def _root():
+    return Response(content=html, media_type="text/html")
diff --git a/package.json b/package.json
@@ -0,0 +1,5 @@
+{
+    "engines": {
+        "node": "18.x"
+    }
+}
diff --git a/public/__init__.py b/public/__init__.py
diff --git a/public/favicon.ico b/public/favicon.ico