diff --git a/pysyntext/libs/cykalgo.py b/pysyntext/libs/cykalgo.py index aa33a78..5af0c48 100644 --- a/pysyntext/libs/cykalgo.py +++ b/pysyntext/libs/cykalgo.py @@ -23,7 +23,7 @@ def __init__(self, ctx, collection): rule["prod"] = tuple(rule["prod"]) self.grammar.append(rule) - def wfst(self, sentence, agreement=False): + def wfst(self, sentence): """Create and complete a Well-Formed Substring Table (2-dimensional list of used by the algorithm). @@ -68,80 +68,110 @@ def isAppliable(rule): wfst[i][i + 1].append({ "pos": token, + "agr_pos": token, "children": [None] * 2 }) size += 1 - if agreement: - - for span in range(2, size): - for start in range(size - span): - end = start + span - for mid in range(start + 1, end): - - for left in wfst[start][mid]: - for right in wfst[mid][end]: - - for rule in filter(isAppliable, self.grammar): - - if ('full_agr' in rule and - ('Gender' in left['pos'] and - 'Gender' in right['pos']) and - ('Number' in left['pos'] and - 'Number' in right['pos']) and - ((left['pos']['Gender'] != - right['pos']['Gender']) or - (left['pos']['Number'] != - right['pos']['Number']))): - continue - - if ('num_agr' in rule and - ('Number' in left['pos'] and - 'Number' in right['pos']) and - (left['pos']['Number'] != - right['pos']['Number'])): - continue - - target_rule = rule - - if 'Gender' in left['pos']: - target_rule['Gender'] = left - ['pos']['Gender'] - elif 'Gender' in right['pos']: - target_rule['Gender'] = right - ['pos']['Gender'] - - if 'Number' in left['pos']: - target_rule['Number'] = left - ['pos']['Number'] - elif 'Number' in right['pos']: - target_rule['Number'] = right - ['pos']['Number'] - - wfst[start][end].append({ - 'pos': target_rule, - 'children': [left, right] - }) - - else: - for span in range(2, size): - for start in range(size - span): - end = start + span - for mid in range(start + 1, end): - - for left in wfst[start][mid]: - for right in wfst[mid][end]: - - for rule in filter(isAppliable, self.grammar): - - wfst[start][end].append({ - 'pos': rule, - 'children': [left, right] - }) + for span in range(2, size): + for start in range(size - span): + end = start + span + for mid in range(start + 1, end): + + for left in wfst[start][mid]: + for right in wfst[mid][end]: + + for rule in filter(isAppliable, self.grammar): + + agr_rule = rule + + if 'Gender' in right['agr_pos']: + agr_rule['Gender'] = right['agr_pos']['Gender'] + elif 'Gender' in left['agr_pos']: + agr_rule['Gender'] = left['agr_pos']['Gender'] + + if 'Number' in right['agr_pos']: + agr_rule['Number'] = right['agr_pos']['Number'] + elif 'Number' in left['agr_pos']: + agr_rule['Number'] = left['agr_pos']['Number'] + + if ('full_agr' in rule and + ('Gender' in left['agr_pos'] and + 'Gender' in right['agr_pos']) and + ('Number' in left['agr_pos'] and + 'Number' in right['agr_pos']) and + ((left['agr_pos']['Gender'] != + right['agr_pos']['Gender']) or + (left['agr_pos']['Number'] != + right['agr_pos']['Number']))): + agr_rule = dict() + + if ('num_agr' in rule and + ('Number' in left['agr_pos'] and + 'Number' in right['agr_pos']) and + (left['agr_pos']['Number'] != + right['agr_pos']['Number'])): + agr_rule = dict() + + wfst[start][end].append({ + 'pos': rule, + 'agr_pos': agr_rule, + 'children': [left, right] + }) return wfst + def findErrors(self, wfst): + """Search for some of agreement errors. + + Args: + wfst (list) + + Returns: + tuple: Indexes of problematic elements. + None: No errors were detected. + + """ + + if len(wfst[0][len(wfst) - 1]) < 1: + return + + if 'upos' in wfst[0][len(wfst) - 1][0]['agr_pos']: + return + + if 'upos' not in wfst[0][len(wfst) - 1][0]['pos']: + return + + buf = [wfst[0][len(wfst) - 1][0]] + + count = 1 + nextCount = 0 + + index = 0 + + while count > 0: + + node = buf.pop(0) + + if node: + if 'upos' in node['agr_pos']: + if node['pos']['upos'] == node['agr_pos']['upos']: + error_indexes = (index, index + 1) + return error_indexes + + count -= 1 + index += 1 + + for i in [0, 1]: + if node['children'][i]: + buf.append(node['children'][i]) + nextCount += 1 + + if count == 0: + count = nextCount + nextCount = 0 + def display(self, wfst): """Print the given WFST. @@ -150,23 +180,50 @@ def display(self, wfst): """ - print('\nWFST ' + ' '.join( + print('\nNAGR ' + ' '.join( [("%-4d" % i) for i in range(1, len(wfst))]) ) + for i in range(len(wfst) - 1): print("%d " % i, end='') for j in range(1, len(wfst)): print( "%-5s" % ( wfst[i][j][0]['pos']['upos'] - if wfst[i][j] + if wfst[i][j] and 'upos' in wfst[i][j][0]['pos'] else '.'), end='' ) print() + print () + + print('\nWAGR ' + ' '.join( + [("%-4d" % i) + for i + in range(1, len(wfst))]) + ) + + for i in range(len(wfst) - 1): + print("%d " % i, end='') + for j in range(1, len(wfst)): + print( + "%-5s" % ( + wfst[i][j][0]['agr_pos']['upos'] + if wfst[i][j] and 'upos' in wfst[i][j][0]['agr_pos'] + else '.'), + end='' + ) + print() + + possible_errors = self.findErrors(wfst) + + if possible_errors: + print() + print("Possible_errors: ", possible_errors) + def treefy(self, wfst): """Get the syntax tree from completed WFST @@ -202,13 +259,19 @@ def treefy(self, wfst): if node: if 'word' in node['pos']: tree.append({'id': index, - 'word': node['pos']['word'], 'tag': 'T', + 'word': node['pos']['word'], 'morph': node['pos']}) else: tree.append({'id': index, 'tag': node['pos']['upos'], + 'Gender': (node['pos']['Gender'] if + 'Gender' in node['pos'] else + None), + 'Number': (node['pos']['Number'] if + 'Number' in node['pos'] else + None), 'linksTo': [2 * link_index + 1, 2 * link_index + 2]}) link_index += 1