-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio.py
92 lines (80 loc) · 3.12 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from pydub import AudioSegment
from pydub.playback import play
import io
import requests
import time
import subprocess
one_second_silence = AudioSegment.silent(duration=500)
eleven_labs_base_url = "https://api.elevenlabs.io"
def edit_voice_settings(api_key : str,settings: object):
"""
Edit voice settings
Parameters:
api_key (str): Eleven Labs API Key
settings (obj): voice_settings
return: HTTP status code
"""
eleven_labs_url = eleven_labs_base_url + "/v1/voices/" + settings['id'] + "/settings/edit"
json = {
"stability": settings['stability'],
"similarity_boost": settings['similarity_boost'],
}
r = post_request(eleven_labs_url,api_key,json)
return r.status_code
def generate_voice(api_key: str, settings: object, text: str):
"""
Generate and play voice from text
Parameters:
api_key (str): Eleven Labs API Key
settings (obj): voice_settings
text (str) : Text to generate voice from
return: null
"""
eleven_labs_url = eleven_labs_base_url + "/v1/text-to-speech/" + settings['id'] + "/stream?optimize_streaming_latency=2&output_format=mp3_44100_128"
start = time.time()
#eleven_monolingual_v1 is faster but eleven_multilingual_v2 might be faster
json = {
"text": text,
"model_id": "eleven_monolingual_v1",
"language_id": "english",
"voice_settings": {
"stability": settings['stability'],
"similarity_boost": settings['similarity_boost'],
"style": settings['style'],
"use_speaker_boost": settings['use_speaker_boost']
}
}
try:
response = post_request(eleven_labs_url,api_key,json)
response.raise_for_status()
except requests.exceptions.HTTPError as err:
print("Eleven Labs API error: " + str(err))
else:
ffplay_cmd = ['ffplay', '-autoexit', '-nodisp', '-']
ffplay_proc = subprocess.Popen(ffplay_cmd, stdin=subprocess.PIPE)
isAudioReceived = False
for chunk in response.iter_content(chunk_size=4096):
ffplay_proc.stdin.write(chunk)
if(not isAudioReceived):
end = time.time()
print("audio generation(secs):" + str(end - start))
isAudioReceived = True
ffplay_proc.stdin.close()
ffplay_proc.wait()
def post_request(url,api_key,json):
"""
Helper function for post
Parameters:
url (str) : Target Endpoint
api_key (str): Eleven Labs API Key
json (obj): json object for POST
return: HTTP response
"""
headers = {
"accept": "*/*",
"xi-api-key": api_key,
"Content-Type": "application/json",
}
return requests.post(url, headers=headers, json = json)
# text= "My major works include twenty-one Nocturnes, four Scherzos, four Ballades, three Sonatas, two Concertos (No.1 in E minor and No.2 in F minor), twenty-four Preludes, one Fantasie-Impromptu Op. 66 and many others including smaller pieces for solo piano such as Mazurkas, Waltzes and Polonaises. My most popular works are often considered to be the Nocturnes and the Ballades due to their beautiful melodies and emotional depth. I also composed a number of chamber pieces for various ensembles such as string quartets and trios as well as vocal pieces with piano accompaniment"
# generate_voice(text)