-
Notifications
You must be signed in to change notification settings - Fork 922
/
Copy pathpreprocess_crepe.py
69 lines (60 loc) · 2.19 KB
/
preprocess_crepe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import sys,os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import librosa
import torch
import crepe
import argparse
from tqdm import tqdm
def compute_f0(filename, save, device):
audio, sr = librosa.load(filename, sr=16000)
assert sr == 16000
# Load audio
audio = torch.tensor(np.copy(audio))[None]
audio = audio + torch.randn_like(audio) * 0.001
# Here we'll use a 10 millisecond hop length
hop_length = 160
# Provide a sensible frequency range for your domain (upper limit is 2006 Hz)
# This would be a reasonable range for speech
fmin = 50
fmax = 1000
# Select a model capacity--one of "tiny" or "full"
model = "full"
# Pick a batch size that doesn't cause memory errors on your gpu
batch_size = 512
# Compute pitch using first gpu
pitch, periodicity = crepe.predict(
audio,
sr,
hop_length,
fmin,
fmax,
model,
batch_size=batch_size,
device=device,
return_periodicity=True,
)
# CREPE was not trained on silent audio. some error on silent need filter.pitPath
periodicity = crepe.filter.median(periodicity, 7)
pitch = crepe.filter.mean(pitch, 5)
pitch[periodicity < 0.5] = 0
pitch = pitch.squeeze(0)
np.save(save, pitch, allow_pickle=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-w", "--wav", help="wav", dest="wav", required=True)
parser.add_argument("-p", "--pit", help="pit", dest="pit", required=True)
args = parser.parse_args()
print(args.wav)
print(args.pit)
os.makedirs(args.pit, exist_ok=True)
wavPath = args.wav
pitPath = args.pit
device = "cuda" if torch.cuda.is_available() else "cpu"
for spks in os.listdir(wavPath):
if os.path.isdir(f"./{wavPath}/{spks}"):
os.makedirs(f"./{pitPath}/{spks}", exist_ok=True)
files = [f for f in os.listdir(f"./{wavPath}/{spks}") if f.endswith(".wav")]
for file in tqdm(files, desc=f'Processing crepe {spks}'):
file = file[:-4]
compute_f0(f"{wavPath}/{spks}/{file}.wav", f"{pitPath}/{spks}/{file}.pit", device)