-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscribe.py
47 lines (31 loc) · 1.38 KB
/
transcribe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import whisper
import pathlib
# from shutil import copyfile
# from genericpath import exists
import natsort
import glob
# assign directory
# directory = '/Users/corlangerak/Documents/Work2022 2023/Project Davy van Gerven AI jeroen willems/trim-recordings-Jeroen/'
# directory = '/Users/corlangerak/Documents/Work2022 2023/Project Davy van Gerven AI jeroen willems/test'
directory = "C:/Users/Gebruiker/Documents/litanie/litanie_code/wavs_split_final"
model = whisper.load_model('large')
language_whisper = "dutch"
options = dict(language=language_whisper)
transcribe_options = dict(task="transcribe", **options)
metadata = open(os.path.join("C:/Users/Gebruiker/Documents/litanie/dataset/metadata.csv"), mode="w", encoding="utf8")
# iterate over files in
# that directory
# wav_files = glob.glob('C:/Users/Gebruiker/Documents/litanie/litanie_code/wavs_split_final/*.wav')
audios = []
for filename in os.listdir(directory):
source_file = os.path.join(directory,filename)
audios.append(source_file)
sortedwav_files = natsort.natsorted(audios)
for audio in sortedwav_files:
result = model.transcribe(audio, **transcribe_options)
filename = pathlib.Path(audio).stem
transcription = (f'{filename}|{result["text"]}\n')
metadata.write(transcription)
print(f'DONE with {filename}')
metadata.close