Skip to content

Commit

Permalink
Add phonemizer for Belarusian language (#2856)
Browse files Browse the repository at this point in the history
  • Loading branch information
alex73 authored Aug 28, 2023
1 parent b79b6f0 commit fead04f
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 1 deletion.
Empty file.
34 changes: 34 additions & 0 deletions TTS/tts/utils/text/belarusian/phonemizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os

finder = None


def init():
try:
import jpype
import jpype.imports
except ModuleNotFoundError:
raise ModuleNotFoundError("Belarusian phonemizer requires to install module 'jpype1' manually. Try `pip install jpype1`.")

try:
jar_path = os.environ["BEL_FANETYKA_JAR"]
except KeyError:
raise KeyError("You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file")

jpype.startJVM(classpath=[jar_path])

# import the Java modules
from org.alex73.korpus.base import GrammarDB2, GrammarFinder

grammar_db = GrammarDB2.initializeFromJar()
global finder
finder = GrammarFinder(grammar_db)


def belarusian_text_to_phonemes(text: str) -> str:
# Initialize only on first run
if finder is None:
init()

from org.alex73.fanetyka.impl import FanetykaText
return str(FanetykaText(finder, text).ipa)
4 changes: 4 additions & 0 deletions TTS/tts/utils/text/phonemizers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer
from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
Expand Down Expand Up @@ -35,6 +36,7 @@
DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name()
DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name()


# JA phonemizer has deal breaking dependencies like MeCab for some systems.
Expand Down Expand Up @@ -68,6 +70,8 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
return KO_KR_Phonemizer(**kwargs)
if name == "bn_phonemizer":
return BN_Phonemizer(**kwargs)
if name == "be_phonemizer":
return BEL_Phonemizer(**kwargs)
raise ValueError(f"Phonemizer {name} not found")


Expand Down
55 changes: 55 additions & 0 deletions TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from typing import Dict

from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes

_DEF_BE_PUNCS = ",!." # TODO


class BEL_Phonemizer(BasePhonemizer):
"""🐸TTS be phonemizer using functions in `TTS.tts.utils.text.belarusian.phonemizer`
Args:
punctuations (str):
Set of characters to be treated as punctuation. Defaults to `_DEF_BE_PUNCS`.
keep_puncs (bool):
If True, keep the punctuations after phonemization. Defaults to False.
"""

language = "be"

def __init__(self, punctuations=_DEF_BE_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)

@staticmethod
def name():
return "be_phonemizer"

@staticmethod
def phonemize_be(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument
return belarusian_text_to_phonemes(text)

def _phonemize(self, text, separator):
return self.phonemize_be(text, separator)

@staticmethod
def supported_languages() -> Dict:
return {"be": "Belarusian"}

def version(self) -> str:
return "0.0.1"

def is_available(self) -> bool:
return True


if __name__ == "__main__":
txt = "тэст"
e = BEL_Phonemizer()
print(e.supported_languages())
print(e.version())
print(e.language)
print(e.name())
print(e.is_available())
print("`" + e.phonemize(txt) + "`")
4 changes: 3 additions & 1 deletion recipes/bel-alex73/train_glowtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
output_path=output_path,
add_blank=True,
datasets=[dataset_config],
characters=characters,
# characters=characters,
enable_eos_bos_chars=True,
mixed_precision=False,
save_step=10000,
Expand All @@ -69,6 +69,8 @@
text_cleaner="no_cleaners",
audio=audio_config,
test_sentences=[],
use_phonemes=True,
phoneme_language="be",
)

if __name__ == "__main__":
Expand Down
29 changes: 29 additions & 0 deletions tests/text_tests/test_belarusian_phonemizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
import warnings
import unittest

from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes

_TEST_CASES = """
Фанетычны канвертар/fanʲɛˈtɨt͡ʂnɨ kanˈvʲɛrtar
Гэтак мы працавалі/ˈɣɛtak ˈmɨ prat͡saˈvalʲi
"""


class TestText(unittest.TestCase):
def test_belarusian_text_to_phonemes(self):
try:
os.environ["BEL_FANETYKA_JAR"]
except KeyError:
warnings.warn(
"You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file to test Belarusian phonemizer",
Warning)
return

for line in _TEST_CASES.strip().split("\n"):
text, phonemes = line.split("/")
self.assertEqual(belarusian_text_to_phonemes(text), phonemes)


if __name__ == "__main__":
unittest.main()

0 comments on commit fead04f

Please sign in to comment.