Skip to content

Commit

Permalink
feat: text-to-audio RestAPI
Browse files Browse the repository at this point in the history
  • Loading branch information
Simatwa committed Apr 23, 2024
2 parents c023e17 + 69109b1 commit 33d8a98
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 16 deletions.
3 changes: 2 additions & 1 deletion docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -509,4 +509,5 @@ For instance:

- feat: New text provider - [YepChat](https://yep.com)
- feat: New image provider [Prodia](prodia.com)
- feat: Speech synthesise responses. `--talk-to-me` shortform `-ttm`
- feat: Speech synthesise responses. `--talk-to-me` shortform `-ttm`
- feat: Speech synthesise - **FastAPI** - `/audio`
38 changes: 37 additions & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -484,13 +484,49 @@ The environment variables can be overridden by explicitly declaring new value.
> Save the variables in a `.env` file in your current directory or export them in your `~/.zshrc` file.
> To load previous conversations from a `.txt` file, use the `-fp` or `--filepath` flag. If no flag is passed, the default one will be used. To load context from a file without altering its content, use the `--retain-file` flag.
## Dynamic Provider
## Dynamic Provider & Further Interfaces

Version **0.4.6** also introduces dynamic provider called `g4fauto`, which represents the fastest working g4f-based provider.

<<<<<<< HEAD
> [!TIP]
> To launch web interface for g4f-based providers simply run `$ pytgpt gpt4free gui`.
> `$ pytgpt api run` will start the REST-API. Access docs and redoc at */docs* and */redoc* respectively.
=======
To launch the web interface for g4f-based providers, execute the following command in your terminal:

```bash
$ pytgpt gpt4free gui
```

This command initializes the Web-user interface for interacting with g4f-based providers.

To start the REST-API:

```bash
$ pytgpt api run
```

This command starts the RESTful API server, enabling you to interact with the service programmatically.

For accessing the documentation and redoc, navigate to the following paths in your web browser:
- Documentation: `*/docs*`
- ReDoc: `*/redoc*`

## Speech Synthesis

To enable speech synthesis of responses, ensure you have either the [VLC player](https://www.videolan.org/vlc/index.html) installed on your system or, if you are a [Termux](https://termux.org) user, the [Termux:API](https://wiki.termux.com/wiki/Termux:API) package.

To activate speech synthesis, use the `--talk-to-me` flag or its shorthand `-ttm` when running your commands. For example:
```bash
$ pytgpt generate "Generate an ogre story" --talk-to-me
```
or
```bash
$ pytgpt interactive -ttm
```
This flag instructs the system to audiolize the ai responses and then play them, enhancing the user experience by providing auditory feedback.
>>>>>>> main
> To speech synthesise the responses just ensure you have [vlc player]() in your system or [termux-api](https://wiki.termux.com/wiki/Termux:API) for the case of [Termux](https://termux.org) users.
> Use the flag `--talk-to-me`, shortform `-ttm`.
Expand Down
7 changes: 5 additions & 2 deletions src/pytgpt/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
title="python-tgpt",
summary="Interact with AI without API key",
description=(
"For **text** and **image** generation."
"Full documentation available at official repo : [Simatwa/python-tgpt](https://github.com/Simatwa/python-tgpt)."
"For **text** generation, **text-to-image** and **text-to-audio** conversions."
"\n\n"
"Access redoc at [/redoc](/redoc) endpoint."
"\n\n"
"Full documentation is available at official repo : [Simatwa/python-tgpt](https://github.com/Simatwa/python-tgpt)."
),
version=__version__,
contact={
Expand Down
4 changes: 2 additions & 2 deletions src/pytgpt/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ async def decorator(*args, **kwargs):
except (ProxyError, InvalidProxyURL, SSLError) as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Proxy related error. {get_exception_string(e)}",
detail=dict(message=f"Proxy related error. {get_exception_string(e)}"),
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=get_exception_string(e),
detail=dict(message=get_exception_string(e)),
)

return decorator
132 changes: 123 additions & 9 deletions src/pytgpt/api/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pytgpt.auto import AUTO
from pytgpt.imager import Imager
from pytgpt.imager import Prodia
from pytgpt.utils import Audio
from pytgpt.utils import api_static_image_dir

provider_map = {
Expand Down Expand Up @@ -62,7 +63,7 @@ class ProvidersModel(BaseModel):
}


class UserPayload(BaseModel):
class TextGenerationPayload(BaseModel):
prompt: str
provider: str = "auto"
# is_conversation: bool = False
Expand Down Expand Up @@ -94,7 +95,9 @@ def validate_provider(provider: str) -> object:
if provider not in supported_providers:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Provider '{provider}' is not one of [{', '.join(supported_providers)}]",
detail=dict(
message=f"Provider '{provider}' is not one of [{', '.join(supported_providers)}]",
),
)
return provider

Expand Down Expand Up @@ -177,7 +180,9 @@ def validate_amount(amount: int) -> PositiveInt:
if amount > 10:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Amount {amount} is out of range : 1-10",
detail=dict(
message=f"Amount {amount} is out of range : 1-10",
),
)
return amount

Expand All @@ -186,7 +191,9 @@ def validate_provider(provider: Union[str, None]) -> str:
if provider not in image_providers:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
detail=dict(
message=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
),
)
return "default" if provider is None else provider

Expand Down Expand Up @@ -219,7 +226,9 @@ def validate_provider(provider: Union[str, None]) -> str:
if provider not in image_providers:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
detail=dict(
message=f"Image provider '{provider}' is not one of [{', '.join(list(image_providers.keys()))}]",
),
)
return "default" if provider is None else provider

Expand Down Expand Up @@ -249,7 +258,54 @@ class ImageResponse(BaseModel):
}


def init_provider(payload: UserPayload) -> object:
class TextToAudioPayload(BaseModel):
message: str
voice: Union[str, None] = "Brian"
proxy: Union[dict[str, str], None] = None
timeout: int = 30
model_config = {
"json_schema_extra": {
"example": {
"message": "There is a place for people like you.",
"voice": "Brian",
"proxy": {
"http": "socks4://199.229.254.129:4145",
"https": "socks4://199.229.254.129:4145",
},
"timeout": 30,
}
}
}

@validator("voice")
def validate_voice(voice) -> str:
if not voice in Audio.all_voices:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=dict(
message=f"Voice '{voice}' is not one of '[{', '.join(Audio.all_voices)}]"
),
)
return "Brian" if not voice else voice


class TextToAudioResponse(BaseModel):
"""
- `url` : Link to generated audio file.
"""

url: str

model_config = {
"json_schema_extra": {
"example": {
"url": " http://localhost:8000/static/audios/f9d4233f-9b78-4d87-bc27-5d2ab928f673.mp3",
}
}
}


def init_provider(payload: TextGenerationPayload) -> object:
return provider_map.get(payload.provider, GPT4FREE)(
is_conversation=False, # payload.is_conversation,
max_tokens=payload.max_tokens,
Expand All @@ -274,7 +330,7 @@ async def llm_providers() -> ProvidersModel:

@app.post("/chat/nostream", name="no-stream")
@api_exception_handler
async def non_stream(payload: UserPayload) -> ProviderResponse:
async def non_stream(payload: TextGenerationPayload) -> ProviderResponse:
"""No response streaming.
- `prompt` : User query.
Expand All @@ -301,7 +357,7 @@ async def non_stream(payload: UserPayload) -> ProviderResponse:
)


def generate_streaming_response(payload: UserPayload) -> Generator:
def generate_streaming_response(payload: TextGenerationPayload) -> Generator:
provider_obj: LEO = init_provider(payload)

for text in provider_obj.chat(payload.prompt, stream=True):
Expand All @@ -319,7 +375,7 @@ def generate_streaming_response(payload: UserPayload) -> Generator:

@app.post("/chat/stream", name="stream", response_model=ProviderResponse)
@api_exception_handler
async def stream(payload: UserPayload) -> Any:
async def stream(payload: TextGenerationPayload) -> Any:
"""Stream back response as received.
- `prompt` : User query.
Expand Down Expand Up @@ -436,3 +492,61 @@ async def redirect_image_generation(prompt: str):
return RedirectResponse(
f"https://image.pollinations.ai/prompt/{prompt}",
)


@app.post("/audio", name="text-to-audio")
@api_exception_handler
async def text_to_audio(
payload: TextToAudioPayload, request: Request
) -> TextToAudioResponse:
"""Vocalize text
- `message` : Text to be synthesised.
- `voice` : The voice to use for speech synthesis.
- `timeout` : Http request timeout in seconds.
- `proxy` : Http request proxy.
**NOTE** : *Ensure `proxy` value is correct otherwise make it `null`*
"""
host = f"{request.url.scheme}://{request.url.netloc}"
filename = uuid4().__str__() + ".mp3"
Audio.text_to_audio(
message=payload.message,
voice=payload.voice,
proxies=payload.proxy,
timeout=payload.timeout,
save_to=Audio.cache_dir.joinpath(filename).as_posix(),
)
return TextToAudioResponse(url=f"{host}/static/audios/" + filename)


@app.get("/audio", name="text-to-audio (bytes)")
@api_exception_handler
async def text_to_audio_bytes(
message: str,
voice: str = "Brian",
timeout: int = 30,
proxy: Union[str, None] = None,
):
"""Return raw audio
- `message` : Text to be synthesised.
- `voice` : The voice to use for speech synthesis.
- `timeout` : Http request timeout in seconds.
- `proxy` : Http request proxy.
**NOTE** : *Ensure `proxy` value is correct otherwise make it `null`*
"""
image_bytes = Audio.text_to_audio(
message=message,
voice=voice if voice in Audio.all_voices else "Brian",
proxies={"https": proxy} if proxy else {},
timeout=timeout,
)
return Response(
content=image_bytes,
media_type="audio/mpeg",
headers={
"Content-Disposition": f"attachment; filename={uuid4().__str__()}.mp3"
},
)
8 changes: 7 additions & 1 deletion src/pytgpt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,8 @@ def text_to_audio(
cls,
message: str,
voice: str = "Brian",
proxies: dict[str, str] = {},
timeout: int = 30,
save_to: Union[Path, str] = None,
auto: bool = False,
) -> Union[str, bytes]:
Expand All @@ -899,6 +901,8 @@ def text_to_audio(
Parameters:
message (str): The text to convert to speech
voice (str, optional): The voice to use for speech synthesis. Defaults to "Brian".
proxies (dict, optional): Http request proxies. Default to {}.
timeout (int, optional): Http request timeout. Defaults to 30.
save_to (bool, optional): Path to save the audio file. Defaults to None.
auto (bool, optional): Generate filename for the contents based on `message` and save to `cls.cache_dir`. Defaults to False.
Expand All @@ -912,7 +916,9 @@ def text_to_audio(
url: str = (
f"https://api.streamelements.com/kappa/v2/speech?voice={voice}&text={{{message}}}"
)
resp = requests.get(url=url, headers=cls.headers, stream=True)
resp = requests.get(
url=url, headers=cls.headers, stream=True, proxies=proxies, timeout=timeout
)
if not resp.ok:
raise Exception(
f"Failed to perform the operation - ({resp.status_code}, {resp.reason}) - {resp.text}"
Expand Down

0 comments on commit 33d8a98

Please sign in to comment.