Skip to content

Commit

Permalink
Added importable tts.py
Browse files Browse the repository at this point in the history
  • Loading branch information
lxe committed Nov 27, 2023
1 parent 17c6b61 commit 3e93bf9
Show file tree
Hide file tree
Showing 39 changed files with 308 additions and 42 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.egg-info
build
10 changes: 5 additions & 5 deletions Demo/Inference_LJSpeech.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@
"import librosa\n",
"from nltk.tokenize import word_tokenize\n",
"\n",
"from models import *\n",
"from utils import *\n",
"from text_utils import TextCleaner\n",
"from styletts2.models import *\n",
"from styletts2.utils import *\n",
"from styletts2.text_utils import TextCleaner\n",
"textclenaer = TextCleaner()\n",
"\n",
"%matplotlib inline"
Expand Down Expand Up @@ -160,7 +160,7 @@
"pitch_extractor = load_F0_models(F0_path)\n",
"\n",
"# load BERT model\n",
"from Utils.PLBERT.util import load_plbert\n",
"from styletts2.Utils.PLBERT.util import load_plbert\n",
"BERT_path = config.get('PLBERT_dir', False)\n",
"plbert = load_plbert(BERT_path)"
]
Expand Down Expand Up @@ -221,7 +221,7 @@
"metadata": {},
"outputs": [],
"source": [
"from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
"from styletts2.Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
]
},
{
Expand Down
16 changes: 8 additions & 8 deletions Demo/Inference_LibriTTS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@
"import librosa\n",
"from nltk.tokenize import word_tokenize\n",
"\n",
"from models import *\n",
"from utils import *\n",
"from text_utils import TextCleaner\n",
"from styletts2.models import *\n",
"from styletts2.utils import *\n",
"from styletts2.text_utils import TextCleaner\n",
"textclenaer = TextCleaner()\n",
"\n",
"%matplotlib inline"
Expand Down Expand Up @@ -160,7 +160,7 @@
"pitch_extractor = load_F0_models(F0_path)\n",
"\n",
"# load BERT model\n",
"from Utils.PLBERT.util import load_plbert\n",
"from styletts2.Utils.PLBERT.util import load_plbert\n",
"BERT_path = config.get('PLBERT_dir', False)\n",
"plbert = load_plbert(BERT_path)"
]
Expand Down Expand Up @@ -222,7 +222,7 @@
"metadata": {},
"outputs": [],
"source": [
"from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
"from styletts2.Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
]
},
{
Expand Down Expand Up @@ -1133,9 +1133,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "NLP",
"display_name": "Python 3",
"language": "python",
"name": "nlp"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1147,7 +1147,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include styletts2 *
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,40 @@ Please make sure you have the LibriTTS checkpoint downloaded and unzipped under
- **Out of memory after `joint_epoch`**: This is likely because your GPU RAM is not big enough for SLM adversarial training run. You may skip that but the quality could be worse. Setting `joint_epoch` a larger number than `epochs` could skip the SLM advesariral training.

## Inference

Quick start example:

```python
from styletts2 import TTS
import sounddevice as sd
import phonemizer

tts = TTS.load_model(
config_path="hf://yl4579/StyleTTS2-LibriTTS/Models/LibriTTS/config.yml",
checkpoint_path="hf://yl4579/StyleTTS2-LibriTTS/Models/LibriTTS/epochs_2nd_00020.pth"
)

es_phonemizer = phonemizer.backend.EspeakBackend(
language='en-us',
preserve_punctuation=True,
with_stress=True
)

style = tts.compute_style('../tts-server/tts_server/voices/en-f-1.wav')

wav, _ = tts.inference(
"This is a text! Hello world! How are you? What's your name?",
style,
phonemizer=es_phonemizer,
alpha=0.3,
beta=0.7,
diffusion_steps=10,
embedding_scale=2)

sd.play(wav, 24000)
sd.wait()
```

Please refer to [Inference_LJSpeech.ipynb](https://github.com/yl4579/StyleTTS2/blob/main/Demo/Inference_LJSpeech.ipynb) (single-speaker) and [Inference_LibriTTS.ipynb](https://github.com/yl4579/StyleTTS2/blob/main/Demo/Inference_LibriTTS.ipynb) (multi-speaker) for details. For LibriTTS, you will also need to download [reference_audio.zip](https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/reference_audio.zip) and unzip it under the `demo` before running the demo.

- The pretrained StyleTTS 2 on LJSpeech corpus in 24 kHz can be downloaded at [https://huggingface.co/yl4579/StyleTTS2-LJSpeech/tree/main](https://huggingface.co/yl4579/StyleTTS2-LJSpeech/tree/main).
Expand Down
22 changes: 22 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from setuptools import setup, find_packages

setup(
name="styletts2",
version="0.0.1",
packages=find_packages(),
include_package_data=True,
install_requires=[
"cached_path",
"nltk",
"scipy",
"numpy",
"munch",
"librosa",
"sounddevice",
"einops",
"einops_exts",
"transformers",
"matplotlib",
"monotonic_align @ git+https://github.com/resemble-ai/monotonic_align.git",
]
)
Loading

0 comments on commit 3e93bf9

Please sign in to comment.