-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_regex.py
76 lines (64 loc) · 2.2 KB
/
test_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from regex_patterns import *
import collections
from pathlib import Path
data = Path('source/patched')
re_sets = [
('common', common_tc),
('hebrew', heb_tc),
('greek', greek_tc),
]
comps = collections.defaultdict(list)
# make sure all patterns compile without error
for name, patterns in re_sets:
print(f'compiling {name}')
for i, pattern in enumerate(patterns):
try:
comps[name].append(regex.compile(pattern[0]))
except:
raise Exception(f'Problem in pattern {i} of {name} set: {pattern}')
# test that all patterns work as expected and
# are able to retrieve at least some matches
examples = collections.defaultdict(lambda: collections.defaultdict(list))
def add_ex(set, pattern, string):
test = pattern.search(string)
if test:
examples[set][pattern.pattern].append((string, test.group(0)))
print('gathering examples...')
for set, patterns in comps.items():
print(f'\tgathering examples in set {set}')
for pattern in patterns:
done = False
for file in sorted(data.glob('*.par')):
if done:
break
for line in file.read_text().split('\n'):
# skip reference string lines
if ref_string.match(line) or not line:
continue
try:
heb_col, grk_col = line.split('\t')
except:
raise Exception(file, line)
if set == "common":
add_ex(set, pattern, line)
elif set == "hebrew":
add_ex(set, pattern, heb_col)
elif set == "greek":
add_ex(set, pattern, grk_col)
if len(examples[set][pattern.pattern]) > 5:
done = True
break
# show all matched patterns
for set, patterns in examples.items():
print('showing examples')
print()
print(f'------ {set} set -----')
print()
for i, pattern in enumerate(patterns):
exs = patterns[pattern]
print(i, pattern)
if len(exs) == 0:
raise Exception(f'pattern has no matches!')
for ex in exs:
print(f'\t{ex}')
print()