From e5c208d2545f7d1248c93c3a8ef8ebdedc2c0672 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 14 Jun 2024 15:06:03 +0200 Subject: [PATCH 1/3] feat(cleaners): add multilingual phoneme cleaner This doesn't convert numbers into English words. --- TTS/tts/utils/text/cleaners.py | 19 ++++++++++++++++--- tests/text_tests/test_text_cleaners.py | 7 ++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index 794a87c866..f829e4cc1c 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -3,6 +3,7 @@ # TODO: pick the cleaner for languages dynamically import re +from typing import Optional from anyascii import anyascii @@ -44,8 +45,8 @@ def remove_aux_symbols(text): return text -def replace_symbols(text, lang="en"): - """Replace symbols based on the lenguage tag. +def replace_symbols(text, lang: Optional[str] = "en"): + """Replace symbols based on the language tag. Args: text: @@ -122,7 +123,11 @@ def english_cleaners(text): def phoneme_cleaners(text): - """Pipeline for phonemes mode, including number and abbreviation expansion.""" + """Pipeline for phonemes mode, including number and abbreviation expansion. + + NB: This cleaner converts numbers into English words, for other languages + use multilingual_phoneme_cleaners(). + """ text = en_normalize_numbers(text) text = expand_abbreviations(text) text = replace_symbols(text) @@ -131,6 +136,14 @@ def phoneme_cleaners(text): return text +def multilingual_phoneme_cleaners(text): + """Pipeline for phonemes mode, including number and abbreviation expansion.""" + text = replace_symbols(text, lang=None) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + def french_cleaners(text): """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" text = expand_abbreviations(text, lang="fr") diff --git a/tests/text_tests/test_text_cleaners.py b/tests/text_tests/test_text_cleaners.py index fcfa71e77d..bf0c8d5d8a 100644 --- a/tests/text_tests/test_text_cleaners.py +++ b/tests/text_tests/test_text_cleaners.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners +from TTS.tts.utils.text.cleaners import english_cleaners, multilingual_phoneme_cleaners, phoneme_cleaners def test_time() -> None: @@ -19,3 +19,8 @@ def test_currency() -> None: def test_expand_numbers() -> None: assert phoneme_cleaners("-1") == "minus one" assert phoneme_cleaners("1") == "one" + + +def test_multilingual_phoneme_cleaners() -> None: + assert multilingual_phoneme_cleaners("(Hello)") == "Hello" + assert multilingual_phoneme_cleaners("1:") == "1," From a1495d4bc102e425e948efaf8c7427d47973b607 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 14 Jun 2024 15:09:01 +0200 Subject: [PATCH 2/3] fix(recipes): use multilingual phoneme cleaner in non-english recipes --- recipes/thorsten_DE/align_tts/train_aligntts.py | 2 +- recipes/thorsten_DE/glow_tts/train_glowtts.py | 2 +- recipes/thorsten_DE/speedy_speech/train_speedy_speech.py | 2 +- recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py | 2 +- recipes/thorsten_DE/vits_tts/train_vits.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/recipes/thorsten_DE/align_tts/train_aligntts.py b/recipes/thorsten_DE/align_tts/train_aligntts.py index 32cfd9967f..42363940f3 100644 --- a/recipes/thorsten_DE/align_tts/train_aligntts.py +++ b/recipes/thorsten_DE/align_tts/train_aligntts.py @@ -30,7 +30,7 @@ run_eval=True, test_delay_epochs=-1, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=False, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/glow_tts/train_glowtts.py b/recipes/thorsten_DE/glow_tts/train_glowtts.py index 00c67fb5d8..f7f4a186a2 100644 --- a/recipes/thorsten_DE/glow_tts/train_glowtts.py +++ b/recipes/thorsten_DE/glow_tts/train_glowtts.py @@ -40,7 +40,7 @@ run_eval=True, test_delay_epochs=-1, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py b/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py index a3d0b9db2b..024dcaa31e 100644 --- a/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py +++ b/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py @@ -45,7 +45,7 @@ test_delay_epochs=-1, epochs=1000, min_audio_len=11050, # need to up min_audio_len to avois speedy speech error - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py b/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py index bc0274f5af..a46e27e91b 100644 --- a/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py +++ b/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py @@ -49,7 +49,7 @@ gradual_training=[[0, 6, 64], [10000, 4, 32], [50000, 3, 32], [100000, 2, 32]], double_decoder_consistency=True, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/vits_tts/train_vits.py b/recipes/thorsten_DE/vits_tts/train_vits.py index 4ffa0f30f6..4b773c3508 100644 --- a/recipes/thorsten_DE/vits_tts/train_vits.py +++ b/recipes/thorsten_DE/vits_tts/train_vits.py @@ -40,7 +40,7 @@ run_eval=True, test_delay_epochs=-1, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), From 9cfcc0a0f5ced388a3d2c473e64ebf810b0e53dc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 14 Jun 2024 15:20:04 +0200 Subject: [PATCH 3/3] chore(cleaners): add type hints --- TTS/tts/utils/text/cleaners.py | 37 ++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index f829e4cc1c..fc87025f00 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -18,34 +18,37 @@ _whitespace_re = re.compile(r"\s+") -def expand_abbreviations(text, lang="en"): +def expand_abbreviations(text: str, lang: str = "en") -> str: if lang == "en": _abbreviations = abbreviations_en elif lang == "fr": _abbreviations = abbreviations_fr + else: + msg = f"Language {lang} not supported in expand_abbreviations" + raise ValueError(msg) for regex, replacement in _abbreviations: text = re.sub(regex, replacement, text) return text -def lowercase(text): +def lowercase(text: str) -> str: return text.lower() -def collapse_whitespace(text): +def collapse_whitespace(text: str) -> str: return re.sub(_whitespace_re, " ", text).strip() -def convert_to_ascii(text): +def convert_to_ascii(text: str) -> str: return anyascii(text) -def remove_aux_symbols(text): +def remove_aux_symbols(text: str) -> str: text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text) return text -def replace_symbols(text, lang: Optional[str] = "en"): +def replace_symbols(text: str, lang: Optional[str] = "en") -> str: """Replace symbols based on the language tag. Args: @@ -78,14 +81,14 @@ def replace_symbols(text, lang: Optional[str] = "en"): return text -def basic_cleaners(text): +def basic_cleaners(text: str) -> str: """Basic pipeline that lowercases and collapses whitespace without transliteration.""" text = lowercase(text) text = collapse_whitespace(text) return text -def transliteration_cleaners(text): +def transliteration_cleaners(text: str) -> str: """Pipeline for non-English text that transliterates to ASCII.""" # text = convert_to_ascii(text) text = lowercase(text) @@ -93,7 +96,7 @@ def transliteration_cleaners(text): return text -def basic_german_cleaners(text): +def basic_german_cleaners(text: str) -> str: """Pipeline for German text""" text = lowercase(text) text = collapse_whitespace(text) @@ -101,7 +104,7 @@ def basic_german_cleaners(text): # TODO: elaborate it -def basic_turkish_cleaners(text): +def basic_turkish_cleaners(text: str) -> str: """Pipeline for Turkish text""" text = text.replace("I", "ı") text = lowercase(text) @@ -109,7 +112,7 @@ def basic_turkish_cleaners(text): return text -def english_cleaners(text): +def english_cleaners(text: str) -> str: """Pipeline for English text, including number and abbreviation expansion.""" # text = convert_to_ascii(text) text = lowercase(text) @@ -122,7 +125,7 @@ def english_cleaners(text): return text -def phoneme_cleaners(text): +def phoneme_cleaners(text: str) -> str: """Pipeline for phonemes mode, including number and abbreviation expansion. NB: This cleaner converts numbers into English words, for other languages @@ -136,7 +139,7 @@ def phoneme_cleaners(text): return text -def multilingual_phoneme_cleaners(text): +def multilingual_phoneme_cleaners(text: str) -> str: """Pipeline for phonemes mode, including number and abbreviation expansion.""" text = replace_symbols(text, lang=None) text = remove_aux_symbols(text) @@ -144,7 +147,7 @@ def multilingual_phoneme_cleaners(text): return text -def french_cleaners(text): +def french_cleaners(text: str) -> str: """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" text = expand_abbreviations(text, lang="fr") text = lowercase(text) @@ -154,7 +157,7 @@ def french_cleaners(text): return text -def portuguese_cleaners(text): +def portuguese_cleaners(text: str) -> str: """Basic pipeline for Portuguese text. There is no need to expand abbreviation and numbers, phonemizer already does that""" text = lowercase(text) @@ -170,7 +173,7 @@ def chinese_mandarin_cleaners(text: str) -> str: return text -def multilingual_cleaners(text): +def multilingual_cleaners(text: str) -> str: """Pipeline for multilingual text""" text = lowercase(text) text = replace_symbols(text, lang=None) @@ -179,7 +182,7 @@ def multilingual_cleaners(text): return text -def no_cleaners(text): +def no_cleaners(text: str) -> str: # remove newline characters text = text.replace("\n", "") return text