Skip to content

Commit

Permalink
TTS to accept raw & STT to use async generators (#419)
Browse files Browse the repository at this point in the history
* STT to accept an async generator

* Allow getting RAW data from TTS

* AsyncIterable

* Cleanup

* Fix test
  • Loading branch information
balloob authored Mar 22, 2023
1 parent 261aa6c commit 0836155
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 9 deletions.
26 changes: 18 additions & 8 deletions hass_nabucasa/voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@

from datetime import datetime
from enum import Enum
import logging
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, AsyncIterable
import xml.etree.ElementTree as ET

import aiohttp
from aiohttp.hdrs import ACCEPT, AUTHORIZATION, CONTENT_TYPE
import attr

Expand All @@ -17,8 +15,6 @@
if TYPE_CHECKING:
from . import Cloud

_LOGGER = logging.getLogger(__name__)


class VoiceError(Exception):
"""General Voice error."""
Expand All @@ -39,6 +35,13 @@ class Gender(str, Enum):
FEMALE = "female"


class AudioOutput(str, Enum):
"""Gender Type for voices."""

MP3 = "mp3"
RAW = "raw"


MAP_VOICE = {
("af-ZA", Gender.FEMALE): "AdriNeural",
("af-ZA", Gender.MALE): "WillemNeural",
Expand Down Expand Up @@ -339,7 +342,7 @@ async def _update_token(self) -> None:
self._valid = utc_from_timestamp(float(data["valid"]))

async def process_stt(
self, stream: aiohttp.StreamReader, content: str, language: str
self, stream: AsyncIterable[bytes], content: str, language: str
) -> STTResponse:
"""Stream Audio to Azure congnitive instance."""
if not self._validate_token():
Expand All @@ -366,7 +369,9 @@ async def process_stt(
data["RecognitionStatus"] == "Success", data.get("DisplayText")
)

async def process_tts(self, text: str, language: str, gender: Gender) -> bytes:
async def process_tts(
self, text: str, language: str, gender: Gender, output: AudioOutput
) -> bytes:
"""Get Speech from text over Azure."""
if not self._validate_token():
await self._update_token()
Expand All @@ -385,13 +390,18 @@ async def process_tts(self, text: str, language: str, gender: Gender) -> bytes:
# We can not get here without this being set, but mypy does not know that.
assert self._endpoint_tts is not None

if output == AudioOutput.RAW:
output_header = "raw-16khz-16bit-mono-pcm"
else:
output_header = "audio-24khz-48kbitrate-mono-mp3"

# Send request
async with self.cloud.websession.post(
self._endpoint_tts,
headers={
CONTENT_TYPE: "application/ssml+xml",
AUTHORIZATION: f"Bearer {self._token}",
"X-Microsoft-OutputFormat": "audio-24khz-48kbitrate-mono-mp3",
"X-Microsoft-OutputFormat": output_header,
},
data=ET.tostring(xml_body),
) as resp:
Expand Down
7 changes: 6 additions & 1 deletion tests/test_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ async def test_process_tts(auth_cloud_mock, aioclient_mock):
content=b"My sound",
)
result = await voice_api.process_tts(
"Text for Saying", "en-US", voice.Gender.FEMALE
"Text for Saying", "en-US", voice.Gender.FEMALE, voice.AudioOutput.MP3
)

assert result == b"My sound"
assert aioclient_mock.mock_calls[1][3] == {
"Authorization": "Bearer test-key",
"Content-Type": "application/ssml+xml",
"X-Microsoft-OutputFormat": "audio-24khz-48kbitrate-mono-mp3",
}

0 comments on commit 0836155

Please sign in to comment.