diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..016d2ab
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+ MIT License                                                                                                          
+                                                                                                                      
+ Copyright (c) [2024] [ultrasev]                                                                              
+                                                                                                                      
+ Permission is hereby granted, free of charge, to any person obtaining a copy                                         
+ of this software and associated documentation files (the "Software"), to deal                                        
+ in the Software without restriction, including without limitation the rights                                         
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell                                            
+ copies of the Software, and to permit persons to whom the Software is                                                
+ furnished to do so, subject to the following conditions:                                                             
+                                                                                                                      
+ The above copyright notice and this permission notice shall be included in all                                       
+ copies or substantial portions of the Software.                                                                      
+                                                                                                                      
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR                                           
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,                                             
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE                                          
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER                                               
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,                                        
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE                                        
+ SOFTWARE.                                                                                                            
+               
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5ccee41
--- /dev/null
+++ b/README.md
@@ -0,0 +1,50 @@
+<p align="center">
+ <img width="100px" src="public/vercel.png" align="center" alt="Deploy Python(+FastAPI) project on Vercel" />
+ <h2 align="center"> LLM API 反向代理 </h2>
+
+<p align="center">
+  <a href="https://github.com/ultrasev/vercel-python-fastapi/issues">
+    <img alt="Issues" src="https://img.shields.io/github/issues/ultrasev/vercel-python-fastapi?style=flat&color=336791" />
+  </a>
+  <a href="https://github.com/ultrasev/vercel-python-fastapi/pulls">
+    <img alt="GitHub pull requests" src="https://img.shields.io/github/issues-pr/ultrasev/vercel-python-fastapi?style=flat&color=336791" />
+  </a>
+  <br />
+<a href="https://github.com/ultrasev/vercel-python-fastapi/issues/new/choose">Report Bug</a>
+<a href="https://github.com/ultrasev/vercel-python-fastapi/issues/new/choose">Request Feature</a>
+</p>
+
+众所周知，Google, Groq, Cerebras（使用了 Amazon cloudfront） 等供应商在部分国家及地区（e.g, 中国香港）不提供服务。
+
+本项目旨在提供一个反向代理服务，解决在部分国家或地区无法直接访问的问题。
+
+# 支持功能
+
+- 支持供应商：Groq、Google、OpenAI
+- 支持流式输出
+- 兼容 OpenAI API 规范
+
+注：大陆不可直接访问 vercel.app 域名。如想直接访问，可参考之前作者的另一个项目[llmproxy](https://github.com/ultrasev/llmproxy)，通过 cloudflare worker 部署 LLM API 反向代理。
+
+# 示例
+```python
+from openai import AsyncOpenAI
+
+```
+
+# Vercel 一键部署
+
+[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/ultrasev/vercel-python-fastapi/tree/master/llmproxy&demo-title=PythonDeployment&demo-description=Deploy&demo-url=https://llmproxy.vercel.app/&demo-image=https://vercel.com/button)
+
+# Local Development
+
+```bash
+pip3 install -r requirements.txt
+pip3 install uvicorn
+uvicorn main:app --host 0.0.0.0 --port 8000 --reload
+```
+
+# License
+
+Copyright © 2024 [ultrasev](https://github.com/ultrasev).<br />
+This project is [MIT](LICENSE) licensed.
diff --git a/api/__init__.py b/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/api/hello.py b/api/hello.py
new file mode 100644
index 0000000..d28fc33
--- /dev/null
+++ b/api/hello.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+from fastapi.routing import APIRouter
+router = APIRouter()
+
+
+@router.get("/")
+def read_root():
+    return {"Hello": "World"}
diff --git a/api/servers/__init__.py b/api/servers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/api/servers/base.py b/api/servers/base.py
new file mode 100644
index 0000000..6d85e9f
--- /dev/null
+++ b/api/servers/base.py
@@ -0,0 +1,31 @@
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class OpenAIProxyArgs(BaseModel):
+    model: str
+    messages: List[Message]
+    stream: bool = False
+    temperature: float = Field(default=0.7, ge=0, le=2)
+    top_p: float = Field(default=1, ge=0, le=1)
+    n: int = Field(default=1, ge=1)
+    max_tokens: Optional[int] = None
+    presence_penalty: float = Field(default=0, ge=-2, le=2)
+    frequency_penalty: float = Field(default=0, ge=-2, le=2)
+
+
+async def stream_openai_response(endpoint: str, payload: Dict, headers: Dict):
+    async with httpx.AsyncClient() as client:
+        async with client.stream("POST", endpoint, json=payload, headers=headers) as response:
+            async for line in response.aiter_lines():
+                if line.startswith("data: "):
+                    yield line + "\n\n"
+                elif line.strip() == "data: [DONE]":
+                    break
diff --git a/api/servers/gemini.py b/api/servers/gemini.py
new file mode 100644
index 0000000..4689a99
--- /dev/null
+++ b/api/servers/gemini.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+''' Convert Gemini API to OpenAI API format
+
+Gemini API docs:
+- https://ai.google.dev/gemini-api/docs/text-generation?lang=rest
+'''
+from loguru import logger
+from pydantic import BaseModel
+from fastapi import APIRouter, HTTPException, Header, Query
+from fastapi.responses import JSONResponse, StreamingResponse
+import httpx
+import typing
+from typing import List, Dict, Optional
+from .base import Message
+import time
+import json
+import re
+
+router = APIRouter()
+
+
+GEMINI_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent"
+GEMINI_STREAM_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/{}:streamGenerateContent"
+
+
+class OpenAIProxyArgs(BaseModel):
+    model: str
+    messages: List[Dict[str, str]]
+    stream: bool = False
+    temperature: float = 0.7
+    top_p: float = 1
+    n: int = 1
+    max_tokens: Optional[int] = None
+    presence_penalty: float = 0
+    frequency_penalty: float = 0
+
+
+class MessageConverter:
+    def __init__(self, messages: List[Dict[str, str]]):
+        self.messages = messages
+
+    def convert(self) -> List[Dict[str, str]]:
+        converted_messages = []
+        for message in self.messages:
+            role = "user" if message["role"] == "user" else "model"
+            converted_messages.append({
+                "role": role,
+                "parts": [{"text": message["content"]}]
+            })
+        return converted_messages
+
+
+def convert_gemini_to_openai_response(gemini_response: dict, model: str) -> dict:
+    """Convert Gemini API response to OpenAI-compatible format."""
+    return {
+        "id": gemini_response.get("candidates", [{}])[0].get("content", {}).get("role", ""),
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "usage": {
+            "prompt_tokens": 0,  # Gemini doesn't provide token counts
+            "completion_tokens": 0,
+            "total_tokens": 0
+        },
+        "choices": [{
+            "message": {
+                "role": "assistant",
+                "content": gemini_response.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
+            },
+            "finish_reason": "stop",
+            "index": 0
+        }]
+    }
+
+
+async def stream_gemini_response(model: str, payload: dict, api_key: str):
+    text_pattern = re.compile(r'"text": "(.*?)"')
+
+    async with httpx.AsyncClient() as client:
+        async with client.stream(
+            "POST",
+            GEMINI_STREAM_ENDPOINT.format(model),
+            json=payload,
+            headers={
+                "Content-Type": "application/json",
+                "x-goog-api-key": api_key
+            }
+        ) as response:
+            async for line in response.aiter_lines():
+                line = line.strip()
+                match = text_pattern.search(line)
+                if match:
+                    text_content = match.group(1)
+                    # Unescape any escaped characters
+                    text_content = text_content.encode().decode('unicode_escape')
+
+                    openai_format = {
+                        "id": f"chatcmpl-{int(time.time())}",
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": model,
+                        "choices": [{
+                            "index": 0,
+                            "delta": {
+                                "content": text_content
+                            },
+                            "finish_reason": None
+                        }]
+                    }
+
+                    yield f"data: {json.dumps(openai_format)}\n\n"
+
+    # Send a final chunk to indicate completion
+    final_chunk = {
+        "id": f"chatcmpl-{int(time.time())}",
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [{
+            "index": 0,
+            "delta": {},
+            "finish_reason": "stop"
+        }]
+    }
+    yield f"data: {json.dumps(final_chunk)}\n\n"
+    yield "data: [DONE]\n\n"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(
+    args: OpenAIProxyArgs,
+    authorization: str = Header(...),
+):
+    api_key = authorization.split(" ")[1]
+    model = args.model
+
+    if not api_key:
+        raise HTTPException(status_code=400, detail="API key not provided")
+
+    # Transform args into Gemini API format
+    gemini_payload = {
+        "contents": MessageConverter(args.messages).convert(),
+        "safetySettings": [
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_ONLY_HIGH"
+            }
+        ],
+        "generationConfig": {
+            "temperature": args.temperature,
+            "maxOutputTokens": args.max_tokens,
+            "topP": args.top_p,
+            "topK": 10
+        }
+    }
+
+    if args.stream:
+        return StreamingResponse(stream_gemini_response(model, gemini_payload, api_key), media_type="text/event-stream")
+    else:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                GEMINI_ENDPOINT.format(model),
+                json=gemini_payload,
+                headers={
+                    "Content-Type": "application/json",
+                    "x-goog-api-key": api_key
+                }
+            )
+            logger.info(response.status_code)
+
+            if response.status_code != 200:
+                return JSONResponse(content=response.json(), status_code=response.status_code)
+
+            response_json = response.json()
+
+            # Use the new conversion function
+            openai_compatible_response = convert_gemini_to_openai_response(
+                response_json, args.model)
+
+            return JSONResponse(openai_compatible_response)
diff --git a/api/servers/groq.py b/api/servers/groq.py
new file mode 100644
index 0000000..2283799
--- /dev/null
+++ b/api/servers/groq.py
@@ -0,0 +1,34 @@
+from fastapi import APIRouter, Header, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+from .base import stream_openai_response, OpenAIProxyArgs, Message
+
+router = APIRouter()
+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)):
+    api_key = authorization.split(" ")[1]
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    payload = args.dict(exclude_none=True)
+
+    if args.stream:
+        return StreamingResponse(stream_openai_response(GROQ_API_URL, payload, headers), media_type="text/event-stream")
+    else:
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(GROQ_API_URL, json=payload, headers=headers)
+                response.raise_for_status()
+                return JSONResponse(response.json())
+            except httpx.HTTPStatusError as e:
+                raise HTTPException(
+                    status_code=e.response.status_code, detail=str(e.response.text))
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=str(e))
diff --git a/api/servers/openai.py b/api/servers/openai.py
new file mode 100644
index 0000000..d95b29d
--- /dev/null
+++ b/api/servers/openai.py
@@ -0,0 +1,34 @@
+from fastapi import APIRouter, Header, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+import httpx
+import asyncio
+from typing import List, Dict, Optional
+from .base import stream_openai_response, OpenAIProxyArgs, Message
+
+router = APIRouter()
+OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"
+
+
+@router.post("/chat/completions")
+async def proxy_chat_completions(args: OpenAIProxyArgs, authorization: str = Header(...)):
+    api_key = authorization.split(" ")[1]
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    payload = args.dict(exclude_none=True)
+
+    if args.stream:
+        return StreamingResponse(stream_openai_response(OPENAI_API_URL, payload, headers), media_type="text/event-stream")
+    else:
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(OPENAI_API_URL, json=payload, headers=headers)
+                response.raise_for_status()
+                return JSONResponse(response.json())
+            except httpx.HTTPStatusError as e:
+                raise HTTPException(
+                    status_code=e.response.status_code, detail=str(e.response.text))
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=str(e))
diff --git a/api/v1/__init__.py b/api/v1/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..6b742c4
--- /dev/null
+++ b/main.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+from public.usage import USAGE as html
+from api.hello import router as hello_router
+
+from fastapi import FastAPI
+from fastapi.responses import Response
+from api.servers.groq import router as groq_router
+from api.servers.openai import router as openai_router
+from api.servers.gemini import router as gemini_router
+
+app = FastAPI()
+
+app.include_router(hello_router, prefix="/hello")
+app.include_router(groq_router, prefix="/groq")
+app.include_router(openai_router, prefix="/openai")
+app.include_router(gemini_router, prefix="/gemini")
+
+
+@app.get("/")
+def _root():
+    return Response(content=html, media_type="text/html")
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..de76041
--- /dev/null
+++ b/package.json
@@ -0,0 +1,5 @@
+{
+    "engines": {
+        "node": "18.x"
+    }
+}
\ No newline at end of file
diff --git a/public/__init__.py b/public/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/public/favicon.ico b/public/favicon.ico
new file mode 100644
index 0000000..03dc0c9
Binary files /dev/null and b/public/favicon.ico differ
diff --git a/public/usage.py b/public/usage.py
new file mode 100644
index 0000000..a913f15
--- /dev/null
+++ b/public/usage.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+USAGE = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Usage</title>
+    <style>
+        body {
+            font-family: Courier New, monospace;
+            margin: 0;
+            padding: 0;
+            background-color: #000;
+            color: #fff;
+            font-size: 1.1em;
+            line-height: 1.6;
+        }
+
+        .container {
+            max-width: 800px;
+            margin: 50px auto;
+            padding: 20px;
+            background-color: #000;
+            border: 1px solid #999;
+            border-radius: 5px;
+        }
+        h1, p {
+            margin: 0;
+            padding: 0;
+        }
+        a {
+            color: #007bff;
+            text-decoration: none;
+        }
+        a:hover {
+            text-decoration: underline;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <p> success </p>
+        <h1>Usage</h1>
+        <p>Visit <a href="https://github.com/ultrasev/vercel-python-fastapi" target="_blank">Github doc</a> for more information.</p>
+    </div>
+</body>
+</html>
+
+"""
diff --git a/public/vercel.png b/public/vercel.png
new file mode 100644
index 0000000..fdb1f6f
Binary files /dev/null and b/public/vercel.png differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..5a69ad0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+fastapi==0.88.0
+pydantic~=1.10.4
+python-multipart==0.0.5
+expiringdict==1.2.2
+rich==13.4.2
+openai==1.6.1
+httpx==0.27.0
+loguru==0.7.2
\ No newline at end of file
diff --git a/tests/test_async_api.py b/tests/test_async_api.py
new file mode 100644
index 0000000..22c6ab3
--- /dev/null
+++ b/tests/test_async_api.py
@@ -0,0 +1,83 @@
+import asyncio
+import pytest
+import os
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
+import random
+from loguru import logger
+load_dotenv()
+
+
+def api_endpoint():
+    env = os.environ.get('ENV', 'development')
+    if env == 'production':
+        return "https://vercel-python-fastapi-chi.vercel.app/"
+    elif env == 'development':
+        return "http://192.168.31.46:3000"
+    else:
+        raise ValueError(f"Invalid environment: {env}")
+
+
+BASE_URL = api_endpoint()
+logger.info(f"BASE_URL: {BASE_URL}")
+
+
+async def make_request(supplier: str, api_key: str, model: str):
+    BASE_URL = api_endpoint() + f"/{supplier}"
+    query = "Count from 1 to 5"
+
+    client = AsyncOpenAI(base_url=BASE_URL, api_key=api_key)
+
+    try:
+        stream = await client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": query}],
+            stream=True,
+        )
+
+        content = ""
+        async for chunk in stream:
+            delta_content = chunk.choices[0].delta.content
+            if delta_content:
+                content += delta_content
+                print(f"Received chunk: {delta_content}")  # Debug print
+
+        print(f"Full content: {content}")  # Debug print
+
+        if not content:
+            raise ValueError("Received empty content from API")
+
+        for i in range(1, 6):
+            assert str(
+                i) in content, f"Expected {i} in content, but it's missing. Content: {content}"
+
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")
+        raise
+
+
+@pytest.mark.asyncio
+async def test_openai_streaming():
+    await make_request(
+        supplier="openai",
+        api_key=os.environ["OPENAI_API_KEY"],
+        model="gpt-3.5-turbo"
+    )
+
+
+@pytest.mark.asyncio
+async def test_groq_streaming():
+    await make_request(
+        supplier="groq",
+        api_key=os.environ["GROQ_API_KEY"],
+        model="llama3-70b-8192"
+    )
+
+
+@pytest.mark.asyncio
+async def test_gemini_streaming():
+    await make_request(
+        supplier="gemini",
+        api_key=os.environ["GEMINI_API_KEY"],
+        model="gemini-1.5-flash"
+    )
diff --git a/tests/test_sync_api.py b/tests/test_sync_api.py
new file mode 100644
index 0000000..be388c9
--- /dev/null
+++ b/tests/test_sync_api.py
@@ -0,0 +1,68 @@
+import asyncio
+import pytest
+import os
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
+import random
+from loguru import logger
+load_dotenv()
+
+
+def api_endpoint():
+    env = os.environ.get('ENV', 'development')
+    if env == 'production':
+        return "https://vercel-python-fastapi-chi.vercel.app/"
+    elif env == 'development':
+        return "http://192.168.31.46:3000"
+    else:
+        raise ValueError(f"Invalid environment: {env}")
+
+
+BASE_URL = api_endpoint()
+logger.info(f"BASE_URL: {BASE_URL}")
+
+
+async def make_request(api_key: str,
+                       model: str,
+                       supplier: str,
+                       query: str = "what is the result of 2*21"):
+    client = AsyncOpenAI(base_url=BASE_URL + f"/{supplier}", api_key=api_key)
+    response = await client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant。"},
+            {"role": "user", "content": query}
+        ],
+        temperature=0.7,
+        top_p=1,
+        max_tokens=20
+    )
+    print(type(response), response)
+    return response.choices[0].message.content
+
+
+@pytest.mark.asyncio
+async def test_groq():
+    await make_request(
+        supplier="groq",
+        api_key=os.environ["GROQ_API_KEY"],
+        model="llama3-70b-8192"
+    )
+
+
+@pytest.mark.asyncio
+async def test_openai():
+    await make_request(
+        supplier="openai",
+        api_key=os.environ["OPENAI_API_KEY"],
+        model="gpt-4o-mini"
+    )
+
+
+@pytest.mark.asyncio
+async def test_gemini():
+    await make_request(
+        supplier="gemini",
+        api_key=os.environ["GEMINI_API_KEY"],
+        model="gemini-1.5-flash"
+    )
diff --git a/vercel.json b/vercel.json
new file mode 100644
index 0000000..6e08ab5
--- /dev/null
+++ b/vercel.json
@@ -0,0 +1,14 @@
+{
+  "builds": [
+    {
+      "src": "main.py",
+      "use": "@vercel/python"
+    }
+  ],
+  "routes": [
+    {
+      "src": "/(.*)",
+      "dest": "main.py"
+    }
+  ]
+}
\ No newline at end of file