From 24c7fe96316f4238cf89d2e64a9428103d44e8da Mon Sep 17 00:00:00 2001 From: "jakob.steixner" Date: Wed, 7 Oct 2020 12:41:04 +0200 Subject: [PATCH 1/3] fix: conjugation and lemmatization of particle verbs --- pattern/text/en/inflect.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index e4d81469..23738fe9 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -655,10 +655,37 @@ def __init__(self): 29: 32, 30: 32, 31: 32, 32: 33 # past plural negated }) + def decompose_particle_verb(self, verb): + try: + if len(verb.split()) > 1: + verb, sattelites = verb.split(' ', 1) + else: + sattelites = '' + except Exception as e: + sattelites = '' + + return verb, sattelites + + def tenses(self, verb, parse=True): + if parse: + verb, _ = self.decompose_particle_verb(verb) + return _Verbs.tenses(self, verb, parse=parse) + + def lemma(self, verb, parse=True): + if parse: + verb, sattelites = self.decompose_particle_verb(verb) + lemmatized = _Verbs.lemma(self, verb, parse=parse) + if sattelites: + return ' '.join([lemmatized, sattelites]) + return lemmatized + else: + return _Verbs.lemma(self, verb, parse=parse) + def find_lemma(self, verb): """ Returns the base form of the given inflected verb, using a rule-based approach. This is problematic if a verb ending in -e is given in the past tense or gerund. """ + v = verb.lower() b = False if v in ("'m", "'re", "'s", "n't"): @@ -704,7 +731,12 @@ def find_lemma(self, verb): return v + "e" # decre => decree if v.endswith(("th", "ang", "un", "cr", "vr", "rs", "ps", "tr")): return v + "e" - return v + return verb + + def conjugate(self, verb, *args, **kwargs): + verb, sattelites = self.decompose_particle_verb(verb) + conjugated_verb = _Verbs.conjugate(self, verb, *args, **kwargs) + return ' '.join([conjugated_verb, sattelites]) def find_lexeme(self, verb): """ For a regular verb (base form), returns the forms using a rule-based approach. From 7435fede4b757f7a8e33d2bfa53484e6f798c8d1 Mon Sep 17 00:00:00 2001 From: "jakob.steixner" Date: Wed, 7 Oct 2020 12:42:28 +0200 Subject: [PATCH 2/3] chg: add daten to German faux_prefix_verbs (!=da+ten) --- pattern/text/de/inflect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pattern/text/de/inflect.py b/pattern/text/de/inflect.py index 7eca4ab0..6da5f6b2 100644 --- a/pattern/text/de/inflect.py +++ b/pattern/text/de/inflect.py @@ -493,7 +493,7 @@ def singularize(word, pos=NOUN, gender=MALE, role=SUBJECT, custom={}): 'zerren', 'zuenden', # zünden 'zuzeln', 'gellen', 'zuechten', # züchten 'ankern', 'angeln', 'herzigen', # 'be-herzigen after decomposition - 'dauern', 'darben', 'danken', # for whatever reason 'da' appears in prefix_separable + 'dauern', 'darben', 'danken', 'daten' # for whatever reason 'da' appears in prefix_separable ) # probably more faux_latinate = ( From 9a51869d6f688d34e6604c5dd877ad016da00e80 Mon Sep 17 00:00:00 2001 From: "jakob.steixner" Date: Wed, 7 Oct 2020 13:00:51 +0200 Subject: [PATCH 3/3] fix: fallback return values --- pattern/text/en/inflect.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pattern/text/en/inflect.py b/pattern/text/en/inflect.py index 23738fe9..d9ce5d13 100644 --- a/pattern/text/en/inflect.py +++ b/pattern/text/en/inflect.py @@ -731,12 +731,15 @@ def find_lemma(self, verb): return v + "e" # decre => decree if v.endswith(("th", "ang", "un", "cr", "vr", "rs", "ps", "tr")): return v + "e" - return verb + return v def conjugate(self, verb, *args, **kwargs): verb, sattelites = self.decompose_particle_verb(verb) conjugated_verb = _Verbs.conjugate(self, verb, *args, **kwargs) - return ' '.join([conjugated_verb, sattelites]) + if sattelites: + return ' '.join([conjugated_verb, sattelites]) + else: + return conjugated_verb def find_lexeme(self, verb): """ For a regular verb (base form), returns the forms using a rule-based approach.