From fa0cbd78fea12467d7bb2a3a3407dc3530186600 Mon Sep 17 00:00:00 2001 From: Jake Tae Date: Mon, 28 Aug 2023 05:19:00 -0400 Subject: [PATCH] Update README with new device API (#2876) * docs: update readme w/ .to(device) api * docs: add .to(device) in python quickstart * docs: move section header out of comment * chore: use device instead of hard-coded string * docs: update inference.md --- README.md | 21 ++++++++++++--------- docs/source/inference.md | 12 ++++++------ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 9d82eca62a..6697a1920a 100644 --- a/README.md +++ b/README.md @@ -187,18 +187,21 @@ More details about the docker images (like GPU support) can be found [here](http ### 🐍 Python API +#### Running a multi-speaker and multi-lingual model + ```python +import torch from TTS.api import TTS -# Running a multi-speaker and multi-lingual model +# Get device +device = "cuda" if torch.cuda.is_available() else "cpu" # List available 🐸TTS models and choose the first one -model_name = TTS.list_models()[0] +model_name = TTS().list_models()[0] # Init TTS -tts = TTS(model_name) +tts = TTS(model_name).to(device) # Run TTS - # ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language # Text to speech with a numpy output wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0]) @@ -210,13 +213,13 @@ tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.langu ```python # Init TTS with the target model name -tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False) +tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False).to(device) + # Run TTS tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH) # Example voice cloning with YourTTS in English, French and Portuguese - -tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True) +tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device) tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr-fr", file_path="output.wav") tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt-br", file_path="output.wav") @@ -227,7 +230,7 @@ tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", Converting the voice in `source_wav` to the voice of `target_wav` ```python -tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True) +tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cuda") tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav") ``` @@ -256,7 +259,7 @@ These models will follow the naming convention `coqui_studio/en//coqui_studio models = TTS().list_models() # Init TTS with the target studio speaker -tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False, gpu=False) +tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False) # Run TTS tts.tts_to_file(text="This is a test.", file_path=OUTPUT_PATH) # Run TTS with emotion and speed control @@ -222,7 +222,7 @@ You can find the list of language ISO codes [here](https://dl.fbaipublicfiles.co ```python from TTS.api import TTS -api = TTS(model_name="tts_models/eng/fairseq/vits", gpu=True) +api = TTS(model_name="tts_models/eng/fairseq/vits").to("cuda") api.tts_to_file("This is a test.", file_path="output.wav") # TTS with on the fly voice conversion