This repository was archived by the owner on Mar 22, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcreate_arpabet_json.py
72 lines (61 loc) · 2.51 KB
/
create_arpabet_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import argparse
import json
import sys
def main(input_f, dict_f, out_f):
desc_i = open(input_f)
dic = open(dict_f)
dic_read_line = None
prev_desc = None
dic_file_ended = False
arpabet_descs = []
for desc_line in desc_i:
desc_ld = json.loads(desc_line)
text = desc_ld['text'].lower()
if dic_read_line:
if dic_read_line[0].lower() == text:
desc_ld['arpabet'] = dic_read_line[1]
dic_read_line = None
if 'arpabet' not in desc_ld:
pronun_found = False
while not pronun_found:
line = dic.readline()
if line == '':
sys.stderr.write('WARNING: dictionary file ended while'
' still looking for: {}\n'.format(text))
dic_file_ended = True
break
dic_read_line = line[:-1].split('\t')
if dic_read_line[0].lower() == text:
desc_ld['arpabet'] = dic_read_line[1]
dic_read_line = None
pronun_found = True
elif prev_desc and (prev_desc['text'] ==
dic_read_line[0].lower().split('(')[0]):
sys.stderr.write('INFO: found another pronunciation for: '
'{}\n'.format(prev_desc['text']))
prev_desc_new = prev_desc.copy()
prev_desc_new['arpabet'] = dic_read_line[1]
arpabet_descs.append(prev_desc_new)
else:
break
if 'arpabet' in desc_ld:
arpabet_descs.append(desc_ld)
else:
sys.stderr.write("WARNING: couldn't find pronunciation for: {}\n"
.format(text))
prev_desc = desc_ld
if dic_file_ended:
sys.stderr.write('WARNING: dictionary find ended sooner\n')
break
with open(out_f, 'w') as out:
for desc in arpabet_descs:
out.write(json.dumps(desc) + '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input_desc', type=str, help='Input json line file')
parser.add_argument('dict', type=str,
help='Arpabet translation file of input desc json')
parser.add_argument('output_desc', type=str,
help='Output json line file')
args = parser.parse_args()
main(args.input_desc, args.dict, args.output_desc)