Skip to content

Commit

Permalink
experimenting with segmenting lemmas for better persistent accent han…
Browse files Browse the repository at this point in the history
…dling; knocked off a bunch more 3- stems
  • Loading branch information
jtauber committed Sep 25, 2019
1 parent cb0e061 commit 2a23d70
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 46 deletions.
44 changes: 31 additions & 13 deletions STEM_DATA/homer_lexicon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1102,12 +1102,14 @@
stems:
1-: ἀνιε{athematic}
2-: ἀνησ
3-: ἀνε{root}/ἀνη
3+: ἀν|ηκ/ἀνε{root}/ἀνεηκ

ἀνίστημι:
ἀν-ίστημι:
stems:
1-: ἀνιστα{athematic}
1+: ἀνιστα{athematic}
3-: ἀν-στα{root}/ἀνα-στα{root}/ἀνστη{root}/ἀνστησ/ἀναστησ
3+: ἀνστη{root}/ἀν|εστη{root}/ἀνεστησ
4+: ἀνεστ{athematic}
stem_overrides:
Expand Down Expand Up @@ -1302,7 +1304,7 @@
ἀπαυράω:
stems:
1+: ἀπηυρα
3-: ἀποερσ
3-: ἀποερσ/ἀπουρ # @@@

ἀπαφίσκω:
stems:
Expand All @@ -1323,6 +1325,7 @@

ἀπεῖπον:
stems:
3-: ἀπειπ{2nd}/ἀποειπ{2nd}
3+: ἀπεειπ{2nd}

ἀπείργαθον:
Expand Down Expand Up @@ -1396,7 +1399,7 @@
ἀποβαίνω:
stems:
2-: ἀποβησ
3-: ἀποβα{root}
3-: ἀποβα{root}/ἀποβησ
3+: ἀπεβη{root}/ἀπεβησ{2nd}

ἀποβλύζω:
Expand Down Expand Up @@ -1440,6 +1443,7 @@

ἀποδρύπτω:
stems:
3-: ἀποδρυψ/ἀποδρυφ{2nd} # @@@
6+: ἀπεδρυφθ

ἀποδύνω:
Expand Down Expand Up @@ -1523,6 +1527,7 @@
ἀποκτείνω:
stems:
1-: ἀποκτειν
3-: ἀποκταν{2nd}/ἀποκτειν/ἀποκτ # @@@
3+: ἀπεκταν{2nd}/ἀπεκτειν/ἀπεκτ # @@@
4-: ἀπεκτ

Expand Down Expand Up @@ -1560,6 +1565,10 @@
3+: ἀπολ{2nd}/ἀπωλ{2nd}/ἀπωλεσ/ἀπολεσσ # @@@
4-: ἀπολωλ

ἀπολούω:
stems:
3-: ἀπολουσ

ἀπολυμαίνομαι:
stems:
1-: ἀπολυμαιν
Expand Down Expand Up @@ -1616,7 +1625,7 @@

ἀπονίναμαι:
stems:
3-: ἀπον
3-: ἀπονη{root}/ἀπονα{root}/ἀπονησ
3+: ἀπονη{root}

ἀπονοστέω:
Expand All @@ -1625,7 +1634,7 @@

ἀποξύνω:
stems:
3-: ἀποξυν
3-: ἀποξῡν

ἀποξύω:
stems:
Expand Down Expand Up @@ -1716,7 +1725,7 @@

ἀπορρίπτω:
stems:
3-: ἀπορριψ
3-: ἀπορρῑψ

ἀποσεύω:
stems:
Expand All @@ -1742,6 +1751,7 @@
ἀποστείχω:
stems:
1-: ἀποστειχ
3-: ἀποστιχ{2nd}
3+: ἀπεστιχ{2nd}

ἀποστίλβω:
Expand Down Expand Up @@ -1769,13 +1779,13 @@

ἀποτίθημι:
stems:
3-: ἀποθ{2nd}/ἀποθε{root}
3-: ἀποθ{2nd}/ἀποθε{root} # @@@

ἀποτίνω:
stems:
1-: ἀποτιν
1+: ἀπετινυ{athematic}
3-: ἀποτισ
3-: ἀποτῑσ
3+: ἀπετισ

ἀποτμήγω:
Expand Down Expand Up @@ -1812,8 +1822,8 @@

ἀποφθίνω:
stems:
3-: ἀποφθι{root}
3+: ἀπεφθι{root}
3-: ἀποφθι{root} # @@@
3+: ἀπεφθι{root} # @@@
5+: ἀπεφθι
6+: ἀπεφθιθ

Expand Down Expand Up @@ -1856,8 +1866,11 @@
ἀραρίσκω:
stems:
1+: ἀραρισκ
3+: ἀραρ{2nd}/ἀρσ/ἠραρ
3-: ἀραρ{2nd}/ἀρσ/ἀρ{root} # @@@
3+: ἀραρ{2nd}/ἀρσ/ἠραρ # @@@
6+: ἀρθ
accents:
- ["AAP", "ἄρσας"]

ἀράσσω:
stems:
Expand All @@ -1872,6 +1885,7 @@
ἀρέσκω:
stems:
2-: ἀρεσσ
3-: ἀρεσ/ἀρεσσ

ἀρετάω:
stems:
Expand Down Expand Up @@ -1927,6 +1941,7 @@
stems:
1-: ἁρπαζ
2-: ἁρπαξ
3-: ἁρπαξ/ἁρπασ
3+: ἡρπαξ/ἡρπασ

ἀρτύνω:
Expand Down Expand Up @@ -2065,6 +2080,7 @@
stems:
1-: αὐ
1+: αὐ
3-: αὐσ/ἀϋσ # ἀῡ̈σ
3+: αὐσ/ἀϋσ/ἠϋσ

ἀφαιρέω:
Expand All @@ -2090,6 +2106,7 @@
stems:
1+: ἀφιε{athematic}
2-: ἀφησ
3-: ἀφε{root}/ἀφη
3+: ἀφε{root}/ἀφεηκ/ἀφ|ηκ{2nd}

ἀφικάνω:
Expand Down Expand Up @@ -2160,7 +2177,7 @@
stems:
1-:
2-: ἀσ
3-: ἀσ
3-: ᾱσ
3+: ἀεσ

ἀωτέω:
Expand All @@ -2187,11 +2204,12 @@
4+: ἐβεβηκ/ἐβεβα{root}
stem_overrides:
- ["AA[OP]", "βα{root}"]
- ["AAS", "βη{root}/βε{root}"]
- ["AAS", "βη{root}/βε{root}/βησ/βει"] # @@@

βάλλω:
stems:
1-: βαλλ
3-: βαλ{2nd}
3+: βαλ{2nd}/ἐβαλ{2nd}
4-: βεβληκ
4+: βεβληκ
Expand Down
2 changes: 1 addition & 1 deletion accent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def clean(w):
return rebreath(w).replace("|", "")
return rebreath(w).replace("|", "").replace("-", "")


def calculate_accent(w, parse, lemma, segmented_lemma, stem, inflexion,
Expand Down
28 changes: 14 additions & 14 deletions homer-data/verbs.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@
αἱρέω AMO.3P ἑλοίατο
αἱρέω AMO.3S ἕλοιτο
αἱρέω AMS.1S ἕλωμαι
αἱρέω AMS.2S αἱρήσεαι # considered AMS.2S not FMI.2S??
αἱρέω AMS.2S αἱρήσεαι
αἱρέω AMS.3P ἕλωνται
αἱρέω AMS.3S ἕληται
αἱρέω FAI.1P αἱρήσομεν
Expand Down Expand Up @@ -440,10 +440,10 @@
ἀίσσω AAI.3S ἤϊξε
ἀίσσω AAI.3S ἤϊξεν
ἀίσσω AAP.ASM ἀΐξαντα
# ἀίσσω AAP.NDM ἀίξαντε # missing diaerisis?
# ἀίσσω AAP.NDM ἀίξαντε # missing diaeresis?
ἀίσσω AAP.NDM ἀΐξαντε
ἀίσσω AAP.NPF ἀΐξασαι
# ἀίσσω AAP.NSF ἀίξασα # missing diaerisis?
# ἀίσσω AAP.NSF ἀίξασα # missing diaeresis?
ἀίσσω AAP.NSF ἀΐξασα
ἀίσσω AAP.NSM ἀΐξας
ἀίσσω AAS.3P ἀΐξωσι
Expand Down Expand Up @@ -1167,10 +1167,10 @@
ἀνέχω AAN ἀνασχεῖν
ἀνέχω AAN ἀνασχέμεν
ἀνέχω AAN ἀνσχεθέειν
ἀνέχω AAP.NPM ἀνίσχοντες
# ἀνέχω AAP.NPM ἀνίσχοντες # this is PAP.NPM
ἀνέχω AAP.NSM ἀνασχών
ἀνέχω AMD.2S ἀνάσχεο
ἀνέχω AMD.2S ἄνσχεο
# ἀνέχω AMD.2S ἀνάσχεο # accentuation seems weird
# ἀνέχω AMD.2S ἄνσχεο # accentuation seems weird
ἀνέχω AMI.2S ἄνσχεο
ἀνέχω AMI.3S ἀνέσχετο
ἀνέχω AMN ἀνασχέσθαι
Expand Down Expand Up @@ -1343,7 +1343,7 @@
ἄνωγα YAI.3S ἠνώγει
ἄνωγα YAI.3S ἠνώγειν
ἀνώγω YAI.3S ἠνώγειν
ἀνωθέω AAP.NPM ἀνώσαντες
# ἀνωθέω AAP.NPM ἀνώσαντες # why is this not accented ἀνωσάντες? @@@
ἀοιδιάω PAI.3S ἀοιδιάει
ἀολλίζω AAI.3P ἀόλλισσαν
ἀολλίζω AAP.NSF ἀολλίσσασα
Expand Down Expand Up @@ -1463,10 +1463,10 @@
ἀπερωέω AAO.2S ἀπερωήσειας
ἀπεχθαίρω AAS.1S ἀπεχθήρω
ἀπεχθαίρω PAI.3S ἀπεχθαίρει
# ἀπεχθάνομαι AAI.3S ἀπήχθετο # not AAI.3S
ἀπεχθάνομαι AAP.DPM ἀπεχθομένοισι
ἀπεχθάνομαι AAP.NSM ἀπεχθόμενος
ἀπεχθάνομαι AAS.2S ἀπέχθηαι
# ἀπεχθάνομαι AAI.3S ἀπήχθετο # not AAI
# ἀπεχθάνομαι AAP.DPM ἀπεχθομένοισι # not AAP
# ἀπεχθάνομαι AAP.NSM ἀπεχθόμενος # not AAP
# ἀπεχθάνομαι AAS.2S ἀπέχθηαι # not AAS?
ἀπεχθάνομαι AMI.3S ἀπήχθετο
ἀπεχθάνομαι AMN ἀπεχθέσθαι
ἀπεχθάνομαι IMI.3S ἀπήχθετο
Expand Down Expand Up @@ -1730,7 +1730,7 @@
ἀποτέμνω IAI.3S ἀπέταμνε
ἀποτέμνω PMP.ASM ἀποταμνόμενον
ἀποτίθημι AMN ἀποθέσθαι
ἀποτίθημι AMS.1S ἀποθείομαι
# ἀποτίθημι AMS.1S ἀποθείομαι # not sure how to think about this @@@
ἀποτίνω AAI.3P ἀπέτισαν
ἀποτίνω AAI.3S ἀπέτισεν
ἀποτίνω AAN ἀποτῖσαι
Expand Down Expand Up @@ -1834,7 +1834,7 @@
ἀραρίσκω AAI.3S ἄρσε
ἀραρίσκω AAI.3S ἤραρε
ἀραρίσκω AAI.3S ἤραρεν
ἀραρίσκω AAP.NDM ἀραρόντε
# ἀραρίσκω AAP.NDM ἀραρόντε # @@@ until we can handle multiple accent overrides
ἀραρίσκω AAP.NPM ἄρσαντες
ἀραρίσκω AAP.NSM ἀραρών
ἀραρίσκω AAP.NSM ἄρσας
Expand Down Expand Up @@ -2047,7 +2047,7 @@
αὔω AAI.3S ἄϋσεν
αὔω AAI.3S ἤϋσε
αὔω AAI.3S ἤϋσεν
αὔω AAN ἀῦσαι
# αὔω AAN ἀῦσαι # missing diaeresis
αὔω AAP.GPM ἀϋσάντων
αὔω AAP.NPM ἀΰσαντες
αὔω AAP.NSM ἀΰσας
Expand Down
7 changes: 3 additions & 4 deletions homer_generate_paradigms.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
c = form.count("/") + 1
stem = ginflexion.find_stems(lemma, key, tags)
generated = ginflexion.generate(lemma, key, tags)
segmented_lemma = ginflexion.segmented_lemmas.get(lemma)
if strip_length(form) in [
strip_length(w) for w in sorted(generated)]:
correct = "✓"
Expand Down Expand Up @@ -83,16 +84,14 @@
if debug or correct == "✕":
if first:
output_item(
lemma, key, key_to_part(key), form, None,
lemma, segmented_lemma, key, key_to_part(key), form, None,
stem, possible_stems, likely_stems, possible_parses,
generated, correct)
# if len(likely_stems) == 1:
# print(lemma, likely_stems[0][0], likely_stems[0][1])
first = False

print()
print("{}/{} incorrect".format(incorrect_count, total_count))
print(len(summary_by_lemma))
print("{}/{} incorrect ({} lemmas)".format(incorrect_count, total_count, len(summary_by_lemma)))

if incorrect_count > 0:
sys.exit(1)
3 changes: 2 additions & 1 deletion morphgnt_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
c = form.count("/") + 1
stem = ginflexion.find_stems(lemma, key, tags)
generated = ginflexion.generate(lemma, key, tags)
segmented_lemma = ginflexion.segmented_lemmas.get(lemma)

if strip_length(form) in [
strip_length(w) for w in sorted(generated)]:
Expand All @@ -96,7 +97,7 @@

if debug or correct == "✕":
output_item(
lemma, key, key_to_part(key), form, None,
lemma, segmented_lemma, key, key_to_part(key), form, None,
stem, possible_stems, likely_stems, possible_parses,
generated, correct)

Expand Down
Loading

0 comments on commit 2a23d70

Please sign in to comment.