-
-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathaudio.py
182 lines (135 loc) · 5.15 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import wave
import pyaudio
import configparser
import librosa
import soundfile as sf
from pydub import AudioSegment
# Audio format Parameters for whisper
FORMAT = pyaudio.paInt16 # 16-bit depth
CHANNELS = 1 # Mono
RATE = 16000 # 16kHz
CHUNK = 1024 # Smaller chunks might reduce latency
RECORD_SECONDS = 30 # 30 seconds to match Whisper's expected segment length
def print_input_devices():
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
print("Input devices:")
for i in range(0, numdevices):
if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
print("Input Device ID ", i, " - ", p.get_device_info_by_host_api_device_index(0, i).get('name'))
def print_output_devices():
# Initialize PyAudio
p = pyaudio.PyAudio()
# Get total number of devices
num_devices = p.get_device_count()
# Loop through and print device info
print("Output devices:")
for i in range(num_devices):
device_info = p.get_device_info_by_index(i)
if device_info['maxOutputChannels'] > 0: # Output device
print(f"Index: {i}, Name: {device_info['name']}, Channels: {device_info['maxOutputChannels']}")
# Terminate PyAudio
p.terminate()
def find_output_device_index():
# Initialize ConfigParser
config = configparser.ConfigParser()
# Read settings.ini file
config.read('settings.ini')
# Get values
device_output_name = config.get('Audio Settings', 'device_output_name')
# Initialize PyAudio
p = pyaudio.PyAudio()
# Get total number of devices
num_devices = p.get_device_count()
# Loop through and print device info
for i in range(num_devices):
device_info = p.get_device_info_by_index(i)
device_name = device_info.get('name')
# Search for VB-Audio in the device name
if device_output_name in device_name:
return i
# If no device is found, assert False
assert False, "Could not find output device. Check settings.ini file"
def find_input_device_index():
# Initialize ConfigParser
config = configparser.ConfigParser()
# Read settings.ini file
config.read('settings.ini')
# Get values
device_input_name = config.get('Audio Settings', 'device_input_name')
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
for i in range(0, numdevices):
device_info = p.get_device_info_by_host_api_device_index(0, i)
device_name = device_info.get('name')
# Search for VB-Audio in the device name
if device_input_name in device_name:
return i
# If no device is found, assert False
assert False, "Could not find input device. Check settings.ini file"
def record_short_audio():
# Change this to the index of the device you want to use
# Default recoding devie uses vb audio
input_device_index = find_input_device_index()
# Initialize PyAudio and start recording
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK, input_device_index=input_device_index)
print("Recording...")
frames = []
for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("Finished recording")
# Stop the stream and terminate PyAudio
stream.stop_stream()
stream.close()
audio.terminate()
# Save the audio
save_audio_frames(audio, frames)
def save_audio_frames(audio, frames):
WAVE_OUTPUT_FILENAME = "audio.wav"
# Save the recording as a WAV file
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
print(f"Audio saved as {WAVE_OUTPUT_FILENAME}")
def change_playback_speed(audio_file, speed=1.25):
# Load the audio file with librosa
y, sr = librosa.load(audio_file, sr=None)
# Use librosa's effects.time_stretch for time-stretching without pitch change
y_fast = librosa.effects.time_stretch(y, rate=speed)
# Write the altered audio back to a file
sf.write(audio_file, y_fast, sr)
return audio_file
def play_audio(filename):
# Open the audio file
wf = wave.open(filename, 'rb')
# Create a PyAudio instance
p = pyaudio.PyAudio()
# Open an output stream
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# Read data in chunks
data = wf.readframes(1024)
# Play the audio file
while len(data) > 0:
stream.write(data)
data = wf.readframes(1024)
# Close the stream
stream.stop_stream()
stream.close()
# Terminate the PyAudio instance
p.terminate()
if __name__=='__main__':
print_input_devices()
print_output_devices()
# record_short_audio()