Skip to content

Commit

Permalink
Merge pull request #36 from syntpump/cyk_upgrade
Browse files Browse the repository at this point in the history
CYK remake + agreement error checking
  • Loading branch information
iasx authored May 31, 2019
2 parents 4030a74 + b480897 commit 33bfd2d
Showing 1 changed file with 133 additions and 70 deletions.
203 changes: 133 additions & 70 deletions pysyntext/libs/cykalgo.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, ctx, collection):
rule["prod"] = tuple(rule["prod"])
self.grammar.append(rule)

def wfst(self, sentence, agreement=False):
def wfst(self, sentence):
"""Create and complete a Well-Formed Substring Table
(2-dimensional list of used by the algorithm).
Expand Down Expand Up @@ -68,80 +68,110 @@ def isAppliable(rule):

wfst[i][i + 1].append({
"pos": token,
"agr_pos": token,
"children": [None] * 2
})

size += 1

if agreement:

for span in range(2, size):
for start in range(size - span):
end = start + span
for mid in range(start + 1, end):

for left in wfst[start][mid]:
for right in wfst[mid][end]:

for rule in filter(isAppliable, self.grammar):

if ('full_agr' in rule and
('Gender' in left['pos'] and
'Gender' in right['pos']) and
('Number' in left['pos'] and
'Number' in right['pos']) and
((left['pos']['Gender'] !=
right['pos']['Gender']) or
(left['pos']['Number'] !=
right['pos']['Number']))):
continue

if ('num_agr' in rule and
('Number' in left['pos'] and
'Number' in right['pos']) and
(left['pos']['Number'] !=
right['pos']['Number'])):
continue

target_rule = rule

if 'Gender' in left['pos']:
target_rule['Gender'] = left
['pos']['Gender']
elif 'Gender' in right['pos']:
target_rule['Gender'] = right
['pos']['Gender']

if 'Number' in left['pos']:
target_rule['Number'] = left
['pos']['Number']
elif 'Number' in right['pos']:
target_rule['Number'] = right
['pos']['Number']

wfst[start][end].append({
'pos': target_rule,
'children': [left, right]
})

else:
for span in range(2, size):
for start in range(size - span):
end = start + span
for mid in range(start + 1, end):

for left in wfst[start][mid]:
for right in wfst[mid][end]:

for rule in filter(isAppliable, self.grammar):

wfst[start][end].append({
'pos': rule,
'children': [left, right]
})
for span in range(2, size):
for start in range(size - span):
end = start + span
for mid in range(start + 1, end):

for left in wfst[start][mid]:
for right in wfst[mid][end]:

for rule in filter(isAppliable, self.grammar):

agr_rule = rule

if 'Gender' in right['agr_pos']:
agr_rule['Gender'] = right['agr_pos']['Gender']
elif 'Gender' in left['agr_pos']:
agr_rule['Gender'] = left['agr_pos']['Gender']

if 'Number' in right['agr_pos']:
agr_rule['Number'] = right['agr_pos']['Number']
elif 'Number' in left['agr_pos']:
agr_rule['Number'] = left['agr_pos']['Number']

if ('full_agr' in rule and
('Gender' in left['agr_pos'] and
'Gender' in right['agr_pos']) and
('Number' in left['agr_pos'] and
'Number' in right['agr_pos']) and
((left['agr_pos']['Gender'] !=
right['agr_pos']['Gender']) or
(left['agr_pos']['Number'] !=
right['agr_pos']['Number']))):
agr_rule = dict()

if ('num_agr' in rule and
('Number' in left['agr_pos'] and
'Number' in right['agr_pos']) and
(left['agr_pos']['Number'] !=
right['agr_pos']['Number'])):
agr_rule = dict()

wfst[start][end].append({
'pos': rule,
'agr_pos': agr_rule,
'children': [left, right]
})

return wfst

def findErrors(self, wfst):
"""Search for some of agreement errors.
Args:
wfst (list)
Returns:
tuple: Indexes of problematic elements.
None: No errors were detected.
"""

if len(wfst[0][len(wfst) - 1]) < 1:
return

if 'upos' in wfst[0][len(wfst) - 1][0]['agr_pos']:
return

if 'upos' not in wfst[0][len(wfst) - 1][0]['pos']:
return

buf = [wfst[0][len(wfst) - 1][0]]

count = 1
nextCount = 0

index = 0

while count > 0:

node = buf.pop(0)

if node:
if 'upos' in node['agr_pos']:
if node['pos']['upos'] == node['agr_pos']['upos']:
error_indexes = (index, index + 1)
return error_indexes

count -= 1
index += 1

for i in [0, 1]:
if node['children'][i]:
buf.append(node['children'][i])
nextCount += 1

if count == 0:
count = nextCount
nextCount = 0

def display(self, wfst):
"""Print the given WFST.
Expand All @@ -150,23 +180,50 @@ def display(self, wfst):
"""

print('\nWFST ' + ' '.join(
print('\nNAGR ' + ' '.join(
[("%-4d" % i)
for i
in range(1, len(wfst))])
)

for i in range(len(wfst) - 1):
print("%d " % i, end='')
for j in range(1, len(wfst)):
print(
"%-5s" % (
wfst[i][j][0]['pos']['upos']
if wfst[i][j]
if wfst[i][j] and 'upos' in wfst[i][j][0]['pos']
else '.'),
end=''
)
print()

print ()

print('\nWAGR ' + ' '.join(
[("%-4d" % i)
for i
in range(1, len(wfst))])
)

for i in range(len(wfst) - 1):
print("%d " % i, end='')
for j in range(1, len(wfst)):
print(
"%-5s" % (
wfst[i][j][0]['agr_pos']['upos']
if wfst[i][j] and 'upos' in wfst[i][j][0]['agr_pos']
else '.'),
end=''
)
print()

possible_errors = self.findErrors(wfst)

if possible_errors:
print()
print("Possible_errors: ", possible_errors)

def treefy(self, wfst):
"""Get the syntax tree from completed WFST
Expand Down Expand Up @@ -202,13 +259,19 @@ def treefy(self, wfst):
if node:
if 'word' in node['pos']:
tree.append({'id': index,
'word': node['pos']['word'],
'tag': 'T',
'word': node['pos']['word'],
'morph': node['pos']})

else:
tree.append({'id': index,
'tag': node['pos']['upos'],
'Gender': (node['pos']['Gender'] if
'Gender' in node['pos'] else
None),
'Number': (node['pos']['Number'] if
'Number' in node['pos'] else
None),
'linksTo': [2 * link_index + 1,
2 * link_index + 2]})
link_index += 1
Expand Down

0 comments on commit 33bfd2d

Please sign in to comment.