Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding IBM Watson STT/TTS #669

Open
wants to merge 1 commit into
base: jasper-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions plugins/stt/watson-stt/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
from .watson import IBMWatsonSTTPlugin
10 changes: 10 additions & 0 deletions plugins/stt/watson-stt/plugin.info
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[Plugin]
Name = watson-stt
Version = 1.0.0
License = MIT
URL = http://jasperproject.github.io/
Description = Speech-To-Text implementation which relies on the IBM Watson Speech-To-Text API.

[Author]
Name = Jasper Project
URL = http://jasperproject.github.io/
134 changes: 134 additions & 0 deletions plugins/stt/watson-stt/watson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import logging
import wave
import requests
from jasper import plugin


class IBMWatsonSTTPlugin(plugin.STTPlugin):
"""
Speech-To-Text implementation which relies on the IBM Watson Speech To Text API.

This implementation requires an IBM Cloud Speech-To-Text API key to be present in profile.yml

To obtain an API key:
1. Sign up for a free IBM Cloud Account:
https://console.bluemix.net/registration/
2. Create a Speech-To-Text Instance through the IBM Cloud console:
https://console.bluemix.net/catalog/services/speech-to-text
3. Select your Speech-To-Text Instance and Click Service Credentials.
4. Click New Credential. Name Credential "Jasper Credential" or appropriate.
5. Add your credential username/password to your profile.yml. Add a 'watson_stt' entry
section and set your 'username' and 'password'.
6. Set the value of the 'stt_engine' key in your profile.yml to 'watson'

Excerpt from sample profile.yml:

...
timezone: US/Pacific
stt_engine: watson
watson_stt:
username: $YOUR_API_USERNAME
password: $YOUR_API_PASSWORD
model: en-US_BroadbandModel

"""

def __init__(self, *args, **kwargs):
plugin.STTPlugin.__init__(self, *args, **kwargs)
# FIXME: get init args from config

self._logger = logging.getLogger(__name__)
self._endpoint_url = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize'
self._username = None
self._password = None
self._model = 'en-US_BroadbandModel'
self._http = requests.Session()

self.username = self.profile['watson_stt']['username']
self.password = self.profile['watson_stt']['password']

@property
def endpoint_url(self):
return self._endpoint_url

@property
def model(self):
return self._model

@model.setter
def model(self, value):
self._model = value

@property
def username(self):
return self._username

@username.setter
def username(self, value):
self._username = value

@property
def password(self):
return self._password

@password.setter
def password(self, value):
self._password = value

def transcribe(self, fp):
"""
Performs STT via the IBM Watson Speech-To-Text API, transcribing an audio
file and returning an English string.

Arguments:
audio_file_path -- the path to the .wav file to be transcribed
"""

if not self.username:
self._logger.critical('API username missing, transcription request aborted.')
return []
elif not self.password:
self._logger.critical('API password missing, transcription request aborted.')
return []

wav = wave.open(fp, 'rb')
frame_rate = wav.getframerate()
wav.close()
data = fp.read()

auth = (self.username, self.password)
params = {'inactivity_timeout': 30}
headers = {'Content-Type': 'audio/l16; rate=%s' % frame_rate}
r = self._http.post(
self.endpoint_url,
data=data,
params=params,
headers=headers,
auth=auth
)
try:
r.raise_for_status()
except requests.exceptions.HTTPError:
self._logger.critical('Request failed with http status %d', r.status_code)
if r.status_code == requests.codes['forbidden']:
self._logger.warning('Status 403 is probably caused by ' +
'invalid IBM Cloud API credentials.')
return []
r.encoding = 'utf-8'
try:
response = r.json()
if len(response['results']) == 0:
# Response result is empty
raise ValueError('Nothing has been transcribed.')
results = [result['alternatives'][0]['transcript'] for result in response['results']]
except ValueError as e:
self._logger.warning('Empty response: %s', e.args[0])
results = []
except (KeyError, IndexError):
self._logger.warning('Cannot parse response.', exc_info=True)
results = []
else:
# Convert all results to uppercase
results = tuple(result.upper() for result in results)
self._logger.info('Transcribed: %r', results)
return results
2 changes: 2 additions & 0 deletions plugins/tts/watson-tts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
from .watson import IBMWatsonTTSPlugin
10 changes: 10 additions & 0 deletions plugins/tts/watson-tts/plugin.info
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[Plugin]
Name = watson-tts
Version = 1.0.0
License = MIT
URL = http://jasperproject.github.io/
Description = IBM Watson TTS Online translator

[Author]
Name = Jasper Project
URL = http://jasperproject.github.io/
121 changes: 121 additions & 0 deletions plugins/tts/watson-tts/watson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import json
import tempfile
import logging
import requests
from jasper import plugin


class IBMWatsonTTSPlugin(plugin.TTSPlugin):
"""
Text-To-Speech implementation which relies on the IBM Watson Text To Speech API.

This implementation requires an IBM Cloud Text-To-Speech API key to be present in profile.yml

To obtain an API key:
1. Sign up for a free IBM Cloud Account:
https://console.bluemix.net/registration/
2. Create a Text-To-Speech Instance through the IBM Cloud console:
https://console.bluemix.net/catalog/services/text-to-speech
3. Select your Text-To-Speech Instance and Click Service Credentials.
4. Click New Credential. Name Credential "Jasper Credential" or appropriate.
5. Add your credential username/password to your profile.yml. Add a 'watson_stt' entry
section and set your 'username' and 'password'.
6. Set the value of the 'tts_engine' key in your profile.yml to 'watson'

Excerpt from sample profile.yml:

...
timezone: US/Pacific
tts_engine: watson
watson_tts:
username: $YOUR_API_USERNAME
password: $YOUR_API_PASSWORD

"""

def __init__(self, *args, **kwargs):
plugin.TTSPlugin.__init__(self, *args, **kwargs)
# FIXME: get init args from config

self._logger = logging.getLogger(__name__)
self._endpoint_url = 'https://stream.watsonplatform.net/text-to-speech/api/v1/synthesize'
self._username = None
self._password = None
self._voice = 'en-US_MichaelVoice'
self._http = requests.Session()

self.username = self.profile['watson_tts']['username']
self.password = self.profile['watson_tts']['password']

@property
def endpoint_url(self):
return self._endpoint_url

@property
def voice(self):
return self._voice

@voice.setter
def voice(self, value):
self._voice = value

@property
def username(self):
return self._username

@username.setter
def username(self, value):
self._username = value

@property
def password(self):
return self._password

@password.setter
def password(self, value):
self._password = value

def say(self, phrase):
"""
Performs TTS via the IBM Watson Text-To-Speech API, synthesizing voice audio
and returning an audio file.

Arguments:
phrase -- the text to synthesize into audio
"""

if not self.username:
self._logger.critical('API username missing, synthesize request aborted.')
return []
elif not self.password:
self._logger.critical('API password missing, synthesize request aborted.')
return []

auth = (self.username, self.password)
params = {'voice': self.voice}
payload = {'text': phrase}
headers = {'content-type': 'application/json', 'accept': 'audio/wav;rate=48000'}
r = self._http.post(
self.endpoint_url,
data=json.dumps(payload),
params=params,
headers=headers,
auth=auth
)
try:
r.raise_for_status()
except requests.exceptions.HTTPError:
self._logger.critical('Request failed with http status %d: %s', r.status_code, r.text)
if r.status_code == requests.codes['forbidden']:
self._logger.warning('Status 403 is probably caused by ' +
'invalid IBM Cloud API credentials.')

data = None
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.seek(0)
data = f.read()

return data