Skip to content

Commit

Permalink
factored out some common util code
Browse files Browse the repository at this point in the history
  • Loading branch information
jtauber committed Dec 1, 2016
1 parent 6c7a4a5 commit e0b3614
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 151 deletions.
82 changes: 3 additions & 79 deletions generate_morphgnt_lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from accent import strip_length
from greek_inflexion import GreekInflexion
from morphgnt_utils import bcv_tuple, convert_parse, key_to_part


ginflexion = GreekInflexion(
Expand All @@ -15,84 +16,6 @@
)


# @@@ move this to greek-utils

def bcv_tuple(bcv):
"""
converts a BBCCVV string into a tuple of book, chapter, verse number.
e.g. "012801" returns (1, 28, 1)
"""
return (int(i) for i in [bcv[0:2], bcv[2:4], bcv[4:6]])


def convert_parse(ccat_parse):
if ccat_parse[3] in "DISO":
result = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
elif ccat_parse[3] == "P":
result = ccat_parse[1:4] + "." + ccat_parse[4:7]
elif ccat_parse[3] == "N":
result = ccat_parse[1:4]
if result[1] == "P" and result[0] not in "AF":
result = result[0] + "M" + result[2:]
return result


PARTS = {
"1-": [
"PAD", "PAI", "PAN", "PAP", "PAS", "PAO",
"PMD", "PMI", "PMN", "PMP", "PMS",
],
"1+": [
"IAI",
"IMI",
],
"2-": [
"FAI", "FAN", "FAP",
"FMI",
],
"3-": [
"AAD", "AAN", "AAP", "AAS", "AAO",
"AMD", "AMN", "AMP", "AMS", "AMO",
],
"3+": [
"AAI",
"AMI",
],
"4-": [
"XAI", "XAN", "XAP", "XAS",
],
"4+": [
"YAI",
],
"5-": [
"XMD", "XMI", "XMN", "XMP",
],
"5+": [
"YMI",
],
"6-": [
"APD", "APN", "APP", "APS", "APO",
],
"6+": [
"API",
],
"7-": [
"FPI", "FPP",
]
}

REVERSE_PARTS = {}

for part, tvm_list in PARTS.items():
for tvm in tvm_list:
REVERSE_PARTS[tvm] = part


def pp(key):
return REVERSE_PARTS[key[0:3]]


STEM_GUESSES = defaultdict(lambda: defaultdict(set))

for book_num in [4, 23, 24, 25]:
Expand Down Expand Up @@ -123,7 +46,8 @@ def pp(key):
correct = "✕"
if correct == "✕":
if stem_guess:
STEM_GUESSES[lemma][pp(key)].add(frozenset(stem_guess))
STEM_GUESSES[lemma][key_to_part(key)].add(
frozenset(stem_guess))


for lemma, parts in sorted(STEM_GUESSES.items()):
Expand Down
50 changes: 2 additions & 48 deletions make_morphgnt_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,10 @@

from pysblgnt import morphgnt_rows

BOOK_NUM = 4


# @@@ move this to greek-utils

def bcv_tuple(bcv):
"""
converts a BBCCVV string into a tuple of book, chapter, verse number.
from morphgnt_utils import bcv_tuple, convert_parse

e.g. "012801" returns (1, 28, 1)
"""
return (int(i) for i in [bcv[0:2], bcv[2:4], bcv[4:6]])


PARSE_CODES = [
('-AAPGSM-', 'V-AAP-GSM'),
('-PAPNPF-', 'V-PAP-NPF'),
('-PMPNPF-', 'V-PNP-NPF'),
('-XAPNPM-', 'V-RAP-NPM'),
('-XPPASN-', 'V-RPP-ASN'),
('2AAD-P--', 'V-AAM-2P'),
('2PAD-P--', 'V-PAM-2P'),
('2XAI-S--', 'V-RAI-2S'),
('3AAI-P--', 'V-AAI-3P'),
('3AAI-S--', 'V-AAI-3S'),
('3AMI-S--', 'V-2ADI-3S'), # @@@
('3AMI-S--', 'V-ADI-3S'),
('3API-S--', 'V-API-3S'),
('3APS-P--', 'V-APS-3P'),
('3IAI-P--', 'V-IAI-3P'),
('3IAI-S--', 'V-IAI-3S'),
('3PAI-P--', 'V-PAI-3P'),
('3PAI-S--', 'V-PAI-3S'),
('3PAS-S--', 'V-PAS-3S'),
('3YAI-P--', 'V-LAI-3P'),
('3YAI-S--', 'V-LAI-3S'),
]


def convert_parse(ccat_parse):
if ccat_parse[3] in "DISO":
result = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
elif ccat_parse[3] == "P":
result = ccat_parse[1:4] + "." + ccat_parse[4:7]
return result
BOOK_NUM = 4


VERBS = defaultdict(lambda: defaultdict(set))
Expand All @@ -57,11 +16,6 @@ def convert_parse(ccat_parse):
b, c, v = bcv_tuple(row["bcv"])
if c == 2 and v <= 11:
if row["ccat-pos"] == "V-":
assert (
row["ccat-parse"], row["robinson"]
) in PARSE_CODES, (
row["ccat-parse"], row["robinson"]
)
VERBS[row["lemma"]][convert_parse(row["ccat-parse"])].add(
row["norm"])

Expand Down
25 changes: 1 addition & 24 deletions morphgnt_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,13 @@

from accent import strip_length # , rebreath
from greek_inflexion import GreekInflexion

from morphgnt_utils import bcv_tuple, convert_parse

ginflexion = GreekInflexion("stemming.yaml", "morphgnt_johannine_lexicon.yaml")

debug = False


# @@@ move this to greek-utils

def bcv_tuple(bcv):
"""
converts a BBCCVV string into a tuple of book, chapter, verse number.
e.g. "012801" returns (1, 28, 1)
"""
return (int(i) for i in [bcv[0:2], bcv[2:4], bcv[4:6]])


def convert_parse(ccat_parse):
if ccat_parse[3] in "DISO":
result = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
elif ccat_parse[3] == "P":
result = ccat_parse[1:4] + "." + ccat_parse[4:7]
elif ccat_parse[3] == "N":
result = ccat_parse[1:4]
if result[1] == "P" and result[0] not in "AF":
result = result[0] + "M" + result[2:]
return result


incorrect_count = 0
total_count = 0

Expand Down
76 changes: 76 additions & 0 deletions morphgnt_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# @@@ move this to greek-utils

def bcv_tuple(bcv):
"""
converts a BBCCVV string into a tuple of book, chapter, verse number.
e.g. "012801" returns (1, 28, 1)
"""
return (int(i) for i in [bcv[0:2], bcv[2:4], bcv[4:6]])


def convert_parse(ccat_parse):
if ccat_parse[3] in "DISO":
result = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
elif ccat_parse[3] == "P":
result = ccat_parse[1:4] + "." + ccat_parse[4:7]
elif ccat_parse[3] == "N":
result = ccat_parse[1:4]
if result[1] == "P" and result[0] not in "AF":
result = result[0] + "M" + result[2:]
return result


PARTS = {
"1-": [
"PAD", "PAI", "PAN", "PAP", "PAS", "PAO",
"PMD", "PMI", "PMN", "PMP", "PMS",
],
"1+": [
"IAI",
"IMI",
],
"2-": [
"FAI", "FAN", "FAP",
"FMI",
],
"3-": [
"AAD", "AAN", "AAP", "AAS", "AAO",
"AMD", "AMN", "AMP", "AMS", "AMO",
],
"3+": [
"AAI",
"AMI",
],
"4-": [
"XAI", "XAN", "XAP", "XAS",
],
"4+": [
"YAI",
],
"5-": [
"XMD", "XMI", "XMN", "XMP",
],
"5+": [
"YMI",
],
"6-": [
"APD", "APN", "APP", "APS", "APO",
],
"6+": [
"API",
],
"7-": [
"FPI", "FPP",
]
}

REVERSE_PARTS = {}

for part, tvm_list in PARTS.items():
for tvm in tvm_list:
REVERSE_PARTS[tvm] = part


def key_to_part(key):
return REVERSE_PARTS[key[0:3]]

0 comments on commit e0b3614

Please sign in to comment.