feat: text-to-audio RestAPI

Simatwa · Apr 23, 2024 · 33d8a98 · 33d8a98
2 parents c023e17 + 69109b1
commit 33d8a98
Show file tree

Hide file tree

Showing 6 changed files with 176 additions and 16 deletions.
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -509,4 +509,5 @@ For instance:
 
 - feat: New text provider - [YepChat](https://yep.com)
 - feat: New image provider [Prodia](prodia.com)
-- feat: Speech synthesise responses. `--talk-to-me` shortform  `-ttm`
+- feat: Speech synthesise responses. `--talk-to-me` shortform  `-ttm`
+- feat: Speech synthesise - **FastAPI** - `/audio`
diff --git a/docs/README.md b/docs/README.md
@@ -484,13 +484,49 @@ The environment variables can be overridden by explicitly declaring new value.
 > Save the variables in a `.env` file in your current directory or export them in your `~/.zshrc` file.
 > To load previous conversations from a `.txt` file, use the `-fp` or `--filepath` flag. If no flag is passed, the default one will be used. To load context from a file without altering its content, use the `--retain-file` flag.
 
-## Dynamic Provider
+## Dynamic Provider & Further Interfaces
 
 Version **0.4.6** also introduces dynamic provider called `g4fauto`, which represents the fastest working g4f-based provider.
 
+<<<<<<< HEAD
 > [!TIP]
 > To launch web interface for g4f-based providers simply run `$ pytgpt gpt4free gui`.
 > `$ pytgpt api run` will start the REST-API. Access docs and redoc at */docs* and */redoc* respectively.
+=======
+To launch the web interface for g4f-based providers, execute the following command in your terminal:
+
+```bash
+$ pytgpt gpt4free gui
+```
+
+This command initializes the Web-user interface for interacting with g4f-based providers.
+
+To start the REST-API:
+
+```bash
+$ pytgpt api run
+```
+
+This command starts the RESTful API server, enabling you to interact with the service programmatically.
+
+For accessing the documentation and redoc, navigate to the following paths in your web browser:
+- Documentation: `*/docs*`
+- ReDoc: `*/redoc*`
+
+## Speech Synthesis
+
+To enable speech synthesis of responses, ensure you have either the [VLC player](https://www.videolan.org/vlc/index.html) installed on your system or, if you are a [Termux](https://termux.org) user, the [Termux:API](https://wiki.termux.com/wiki/Termux:API) package.
+
+To activate speech synthesis, use the `--talk-to-me` flag or its shorthand `-ttm` when running your commands. For example:
+```bash
+$ pytgpt generate "Generate an ogre story" --talk-to-me
+```
+or
+```bash
+$ pytgpt interactive -ttm
+```
+This flag instructs the system to audiolize the ai responses and then play them, enhancing the user experience by providing auditory feedback.
+>>>>>>> main
 
 > To speech synthesise the responses just ensure you have [vlc player]() in your system or [termux-api](https://wiki.termux.com/wiki/Termux:API) for the case of [Termux](https://termux.org) users.
 > Use the flag `--talk-to-me`, shortform `-ttm`.

diff --git a/src/pytgpt/api/__init__.py b/src/pytgpt/api/__init__.py
@@ -15,8 +15,11 @@
     title="python-tgpt",
     summary="Interact with AI without API key",
     description=(
-        "For **text** and **image** generation."
-        "Full documentation available at official repo : [Simatwa/python-tgpt](https://github.com/Simatwa/python-tgpt)."
+        "For **text** generation, **text-to-image** and **text-to-audio** conversions."
+        "\n\n"
+        "Access redoc at [/redoc](/redoc) endpoint."
+        "\n\n"
+        "Full documentation is available at official repo : [Simatwa/python-tgpt](https://github.com/Simatwa/python-tgpt)."
     ),
     version=__version__,
     contact={

diff --git a/src/pytgpt/api/utils.py b/src/pytgpt/api/utils.py
@@ -23,12 +23,12 @@ async def decorator(*args, **kwargs):
         except (ProxyError, InvalidProxyURL, SSLError) as e:
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Proxy related error. {get_exception_string(e)}",
+                detail=dict(message=f"Proxy related error. {get_exception_string(e)}"),
             )
         except Exception as e:
             raise HTTPException(
                 status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=get_exception_string(e),
+                detail=dict(message=get_exception_string(e)),
             )
 
     return decorator
diff --git a/src/pytgpt/api/v1.py b/src/pytgpt/api/v1.py
@@ -21,6 +21,7 @@
 from pytgpt.auto import AUTO
 from pytgpt.imager import Imager
 from pytgpt.imager import Prodia
+from pytgpt.utils import Audio
 from pytgpt.utils import api_static_image_dir
 
 provider_map = {
@@ -62,7 +63,7 @@ class ProvidersModel(BaseModel):
     }
 
 
-class UserPayload(BaseModel):
+class TextGenerationPayload(BaseModel):
     prompt: str
     provider: str = "auto"
     # is_conversation: bool = False
@@ -94,7 +95,9 @@ def validate_provider(provider: str) -> object:
         if provider not in supported_providers:
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Provider '{provider}' is not one of [{', '.join(supported_providers)}]",
+                detail=dict(
+                    message=f"Provider '{provider}' is not one of [{', '.join(supported_providers)}]",
+                ),
             )
         return provider
 
@@ -177,7 +180,9 @@ def validate_amount(amount: int) -> PositiveInt:
         if amount > 10:
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Amount {amount} is out of range : 1-10",
+                detail=dict(
+                    message=f"Amount {amount} is out of range : 1-10",
+                ),
             )
         return amount
 
@@ -186,7 +191,9 @@ def validate_provider(provider: Union[str, None]) -> str:
         if provider not in image_providers:
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
+                detail=dict(
+                    message=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
+                ),
             )
         return "default" if provider is None else provider
 
@@ -219,7 +226,9 @@ def validate_provider(provider: Union[str, None]) -> str:
         if provider not in image_providers:
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
+                detail=dict(
+                    message=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
+                ),
             )
         return "default" if provider is None else provider
 
@@ -249,7 +258,54 @@ class ImageResponse(BaseModel):
     }
 
 
-def init_provider(payload: UserPayload) -> object:
+class TextToAudioPayload(BaseModel):
+    message: str
+    voice: Union[str, None] = "Brian"
+    proxy: Union[dict[str, str], None] = None
+    timeout: int = 30
+    model_config = {
+        "json_schema_extra": {
+            "example": {
+                "message": "There is a place for people like you.",
+                "voice": "Brian",
+                "proxy": {
+                    "http": "socks4://199.229.254.129:4145",
+                    "https": "socks4://199.229.254.129:4145",
+                },
+                "timeout": 30,
+            }
+        }
+    }
+
+    @validator("voice")
+    def validate_voice(voice) -> str:
+        if not voice in Audio.all_voices:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=dict(
+                    message=f"Voice '{voice}' is not one of '[{', '.join(Audio.all_voices)}]"
+                ),
+            )
+        return "Brian" if not voice else voice
+
+
+class TextToAudioResponse(BaseModel):
+    """
+    - `url` : Link to generated audio file.
+    """
+
+    url: str
+
+    model_config = {
+        "json_schema_extra": {
+            "example": {
+                "url": " http://localhost:8000/static/audios/f9d4233f-9b78-4d87-bc27-5d2ab928f673.mp3",
+            }
+        }
+    }
+
+
+def init_provider(payload: TextGenerationPayload) -> object:
     return provider_map.get(payload.provider, GPT4FREE)(
         is_conversation=False,  # payload.is_conversation,
         max_tokens=payload.max_tokens,
@@ -274,7 +330,7 @@ async def llm_providers() -> ProvidersModel:
 
 @app.post("/chat/nostream", name="no-stream")
 @api_exception_handler
-async def non_stream(payload: UserPayload) -> ProviderResponse:
+async def non_stream(payload: TextGenerationPayload) -> ProviderResponse:
     """No response streaming.
 
     - `prompt` : User query.
@@ -301,7 +357,7 @@ async def non_stream(payload: UserPayload) -> ProviderResponse:
     )
 
 
-def generate_streaming_response(payload: UserPayload) -> Generator:
+def generate_streaming_response(payload: TextGenerationPayload) -> Generator:
     provider_obj: LEO = init_provider(payload)
 
     for text in provider_obj.chat(payload.prompt, stream=True):
@@ -319,7 +375,7 @@ def generate_streaming_response(payload: UserPayload) -> Generator:
 
 @app.post("/chat/stream", name="stream", response_model=ProviderResponse)
 @api_exception_handler
-async def stream(payload: UserPayload) -> Any:
+async def stream(payload: TextGenerationPayload) -> Any:
     """Stream back response as received.
 
     - `prompt` : User query.
@@ -436,3 +492,61 @@ async def redirect_image_generation(prompt: str):
     return RedirectResponse(
         f"https://image.pollinations.ai/prompt/{prompt}",
     )
+
+
+@app.post("/audio", name="text-to-audio")
+@api_exception_handler
+async def text_to_audio(
+    payload: TextToAudioPayload, request: Request
+) -> TextToAudioResponse:
+    """Vocalize text
+
+    - `message` : Text to be synthesised.
+    - `voice` :  The voice to use for speech synthesis.
+    - `timeout` : Http request timeout in seconds.
+    - `proxy` : Http request proxy.
+
+    **NOTE** : *Ensure `proxy` value is correct otherwise make it `null`*
+    """
+    host = f"{request.url.scheme}://{request.url.netloc}"
+    filename = uuid4().__str__() + ".mp3"
+    Audio.text_to_audio(
+        message=payload.message,
+        voice=payload.voice,
+        proxies=payload.proxy,
+        timeout=payload.timeout,
+        save_to=Audio.cache_dir.joinpath(filename).as_posix(),
+    )
+    return TextToAudioResponse(url=f"{host}/static/audios/" + filename)
+
+
+@app.get("/audio", name="text-to-audio (bytes)")
+@api_exception_handler
+async def text_to_audio_bytes(
+    message: str,
+    voice: str = "Brian",
+    timeout: int = 30,
+    proxy: Union[str, None] = None,
+):
+    """Return raw audio
+
+    - `message` : Text to be synthesised.
+    - `voice` :  The voice to use for speech synthesis.
+    - `timeout` : Http request timeout in seconds.
+    - `proxy` : Http request proxy.
+
+    **NOTE** : *Ensure `proxy` value is correct otherwise make it `null`*
+    """
+    image_bytes = Audio.text_to_audio(
+        message=message,
+        voice=voice if voice in Audio.all_voices else "Brian",
+        proxies={"https": proxy} if proxy else {},
+        timeout=timeout,
+    )
+    return Response(
+        content=image_bytes,
+        media_type="audio/mpeg",
+        headers={
+            "Content-Disposition": f"attachment; filename={uuid4().__str__()}.mp3"
+        },
+    )
diff --git a/src/pytgpt/utils.py b/src/pytgpt/utils.py
@@ -890,6 +890,8 @@ def text_to_audio(
         cls,
         message: str,
         voice: str = "Brian",
+        proxies: dict[str, str] = {},
+        timeout: int = 30,
         save_to: Union[Path, str] = None,
         auto: bool = False,
     ) -> Union[str, bytes]:
@@ -899,6 +901,8 @@ def text_to_audio(
         Parameters:
             message (str): The text to convert to speech
             voice (str, optional): The voice to use for speech synthesis. Defaults to "Brian".
+            proxies (dict, optional): Http request proxies. Default to {}.
+            timeout (int, optional): Http request timeout. Defaults to 30.
             save_to (bool, optional): Path to save the audio file. Defaults to None.
             auto (bool, optional): Generate filename for the contents based on `message` and save to `cls.cache_dir`. Defaults to False.
 
@@ -912,7 +916,9 @@ def text_to_audio(
         url: str = (
             f"https://api.streamelements.com/kappa/v2/speech?voice={voice}&text={{{message}}}"
         )
-        resp = requests.get(url=url, headers=cls.headers, stream=True)
+        resp = requests.get(
+            url=url, headers=cls.headers, stream=True, proxies=proxies, timeout=timeout
+        )
         if not resp.ok:
             raise Exception(
                 f"Failed to perform the operation - ({resp.status_code}, {resp.reason}) - {resp.text}"