-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
issue #72 bug fixes in finite-state source code, readme updates
- Loading branch information
Showing
24 changed files
with
9,737 additions
and
346,661 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# Author: Leonel F. de Alencar, Federal University of Ceará | ||
# Date: April 16, 2018 | ||
# Author: Leonel F. de Alencar, [email protected], Federal University of Ceará | ||
# Date: April 27, 2018, bug corrections February 17, 2020 | ||
|
||
# Implementation of diminutive formation in Portuguese in the paradigm | ||
# of finite-state morphology (Beesley & Karttunen 2003) | ||
|
@@ -15,7 +15,7 @@ | |
# processes in Portuguese. The individual transducers are composed | ||
# into a single transducer encoding all alternation rules. | ||
|
||
# Defining a marker for words with stemms ending in s, | ||
# Defining a marker for words with stems ending in s, | ||
# e.g. "lápis", "burguês", etc. In these words, | ||
# z of -zinho suffix is deleted after a stemm's s, | ||
# e. g. "lapisinho", "burguesinhos". In other cases, | ||
|
@@ -30,6 +30,31 @@ define StemmS %$; | |
# delete this marker | ||
define DelStemmS StemmS -> 0 ; | ||
|
||
# right context defining a non-final hyphen-separated compound member | ||
define Hyph [$"-"] ; | ||
|
||
# protect accents in non-final hyphen-separated compound members from being removed by Unaccent rule | ||
define Protect [ | ||
[á -> A§ || _ Hyph ] | ||
.o. [é -> E§ || _ Hyph ] | ||
.o. [ê -> E¢ || _ Hyph] | ||
.o. [ó -> O§ || _ Hyph] | ||
.o. [ô -> O¢ || _ Hyph] | ||
.o. [í -> I§ || _ Hyph] | ||
.o. [ú -> U§ || _ Hyph] | ||
.o. [â -> A¢ || _ Hyph] | ||
]; | ||
|
||
# convert protected letters back into accented letters | ||
define Reconv [[á -> A§ ] | ||
.o. [ E§ -> é ] | ||
.o. [ E¢ -> ê ] | ||
.o. [ O§ -> ó ] | ||
.o. [ O¢ -> ô ] | ||
.o. [ I§ -> í ] | ||
.o. [ U§ -> ú ] | ||
.o. [ A¢ -> â ]]; | ||
|
||
# anterior vowels | ||
define AntVow [ e | i ] ; | ||
|
||
|
@@ -52,6 +77,15 @@ define PhonC [c -> %[ s %] || _ AntVow MorphSep ] ; | |
# convett back phone [s] to letter c | ||
define OrthC %[ s %] -> c ; | ||
|
||
|
||
# convert letter g to phone [Z] (SAMPA code for the voiced | ||
# postalveolar fricative [ʒ] in IPA) to prevent rule ChangeG | ||
# from applying in cases like herege^inha (diminitive of herege) | ||
define PhonG [g -> %[ Z %] || _ AntVow MorphSep ] ; | ||
|
||
# convett back phone [Z] to letter g | ||
define OrthG %[ Z %] -> g ; | ||
|
||
# delete ç before morpheme separator and anterior vowel | ||
define DeleteCedilla [ ç -> c || _ MorphSep AntVow ]; | ||
|
||
|
@@ -83,7 +117,10 @@ define OptDelEStemZ e (->) 0 || [z | s] _ s MorphSep z ; | |
# words with the stem ending in r, | ||
# e.g. flores^zinhas (diminutive of "flor" 'flower' in plural) | ||
# flores^zinhas => flors^zinhas | ||
define OptDelEStemR e (->) 0 || r _ s MorphSep z ; | ||
define OptDelEStemR e (->) 0 || Vow r _ s MorphSep z ; | ||
|
||
# TODO: abdômen => abdômenes => abdomenezinhos | ||
# => abdomenzinhos | ||
|
||
# composing the two previous rules in one single FST | ||
define OptDelE OptDelEStemZ .o. OptDelEStemR ; | ||
|
@@ -113,24 +150,32 @@ define Unaccent [[á -> a] .o. [é -> e] .o. [ê -> e] .o. [ó -> o] | |
define AltRules NasalBilabAssim .o. | ||
PhonC | ||
.o. | ||
PhonG | ||
.o. | ||
ThemVowDel | ||
.o. | ||
ChangeC | ||
.o. | ||
OrthC | ||
.o. | ||
ChangeG | ||
ChangeG | ||
.o. | ||
OrthG | ||
.o. | ||
OptDelE | ||
.o. | ||
PluralSDeletion | ||
.o. | ||
SuffZDeletion | ||
.o. | ||
Protect | ||
.o. | ||
IDeletion | ||
.o. | ||
Unaccent | ||
.o. | ||
Reconv | ||
.o. | ||
DeleteCedilla | ||
.o. | ||
DelStemmS | ||
|
Oops, something went wrong.