-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathASVRawDataset.py
68 lines (56 loc) · 2.35 KB
/
ASVRawDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import torch.utils.data as data
import librosa
class ASVRawDataset(data.Dataset):
def __init__(self, root, partition, protocol_name, is_rand=False):
super(ASVRawDataset, self).__init__()
self.root = root
self.partition = partition
self.is_rand = is_rand
self.sysid_dict = {
'bonafide': 1, # bonafide speech
'spoof': 0, # Spoofed signal
}
protocol_dir = root.joinpath(protocol_name)
print('Reading ', protocol_dir)
if self.is_rand:
print('Using randomly select sequence')
protocol_lines = open(protocol_dir).readlines()
self.features = []
if self.partition == 'train':
feature_address = 'ASVspoof2019_LA_train'
elif self.partition == 'dev':
feature_address = 'ASVspoof2019_LA_dev'
elif self.partition == 'eval':
feature_address = 'ASVspoof2019_LA_eval'
for protocol_line in protocol_lines:
tokens = protocol_line.strip().split(' ')
# The protocols look like this:
# [0] [1] [2] [3] [4]
# LA_0070 LA_D_7622198 - - bonafide
# - file_name - attack_id sys_id
file_name = tokens[1]
attack_id = tokens[3]
feature_path = self.root.joinpath(feature_address, 'flac', tokens[1] + '.flac')
sys_id = self.sysid_dict[tokens[4]]
self.features.append((feature_path, file_name, attack_id, sys_id))
def load_feature(self, feature_path):
feature, sr = librosa.load(feature_path, sr=16000)
fix_len = sr*4
REPEATED = False
while feature.shape[0] < fix_len:
feature = np.concatenate((feature, feature))
REPEATED = True
if self.is_rand and not REPEATED:
total_length = feature.shape[0]
start = np.random.randint(0, total_length - fix_len + 1)
feature = feature[start:start+fix_len]
else:
feature = feature[:fix_len]
return feature
def __getitem__(self, index):
feature_path, file_name, attack_id, sys_id = self.features[index]
feature = self.load_feature(feature_path)
return feature, file_name, attack_id, sys_id
def __len__(self):
return len(self.features)