-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
89473c4
commit 54dfbd1
Showing
1 changed file
with
148 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import queue | ||
import roboticslab_speech | ||
import sounddevice as sd | ||
import yarp | ||
|
||
from abc import ABC, abstractmethod | ||
from piper import PiperVoice # TODO: hide import | ||
from piper.download import find_voice # TODO: hide import | ||
|
||
class SynthesizerFactory(ABC): | ||
@abstractmethod | ||
def create(self, stream): | ||
pass | ||
|
||
class PiperSynthesizerFactory(SynthesizerFactory): | ||
def __init__(self, device, model, use_cuda, rf): | ||
self.device = device | ||
self.model = model | ||
self.rf = rf | ||
|
||
def create(self, stream): | ||
return PiperSynthesizer(stream, self.device, self.model, use_cuda, self.rf) | ||
|
||
class SpeechSynthesizer(roboticslab_speech.SpeechSynthesis): | ||
def __init__(self, stream, device): | ||
super().__init__() | ||
self.stream = stream | ||
device_info = sd.query_devices(device, 'input') | ||
# soundfile expects an int, sounddevice provides a float: | ||
self.sample_rate = int(device_info['default_samplerate']) | ||
|
||
class PiperSynthesizer(SpeechSynthesizer): | ||
def __init__(self, stream, device, model, use_cuda, rf): | ||
super().__init__(stream, device) | ||
self.model = model | ||
self.rf = rf | ||
self.voice = PiperVoice(self.model, None, use_cuda=use_cuda) | ||
|
||
def say(self, text): | ||
pass | ||
|
||
def play(self): | ||
pass | ||
|
||
def pause(self): | ||
pass | ||
|
||
def stop(self): | ||
pass | ||
|
||
def checkSayDone(self): | ||
pass | ||
|
||
def setLanguage(self, language): | ||
pass | ||
|
||
def setSpeed(self, speed): | ||
pass | ||
|
||
def setPitch(self, pitch): | ||
pass | ||
|
||
def getSpeed(self): | ||
pass | ||
|
||
def getPitch(self): | ||
pass | ||
|
||
def getSupportedLangs(self): | ||
pass | ||
|
||
def int_or_str(text): | ||
"""Helper function for argument parsing.""" | ||
try: | ||
return int(text) | ||
except ValueError: | ||
return text | ||
|
||
BACKENDS = ['piper'] | ||
|
||
parser = argparse.ArgumentParser(description='YARP service that transforms text into live audio output', add_help=False) | ||
parser.add_argument('--list-devices', action='store_true', help='list available audio devices and exit') | ||
parser.add_argument('--list-backends', action='store_true', help='list available TTS backends and exit') | ||
args, remaining = parser.parse_known_args() | ||
|
||
if args.list_devices: | ||
print(sd.query_devices()) | ||
raise SystemExit | ||
elif args.list_backends: | ||
print('\n'.join(BACKENDS)) | ||
raise SystemExit | ||
|
||
parser = argparse.ArgumentParser(description=parser.description, formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[parser]) | ||
parser.add_argument('--backend', '-b', type=str, required=True, help='ASR backend engine') | ||
parser.add_argument('--device', '-d', type=int_or_str, help='input device (numeric ID or substring)') | ||
parser.add_argument('--cuda', action='store_true', help='Use Onnx CUDA execution provider (requires onnxruntime-gpu)') | ||
parser.add_argument('--prefix', '-p', type=str, default='/speechSynthesis', help='YARP port prefix') | ||
parser.add_argument('--context', type=str, default='speechSynthesis', help='YARP context directory') | ||
parser.add_argument('--from', type=str, dest='ini', default='speechSynthesis.ini', help='YARP configuration (.ini) file') | ||
args = parser.parse_args(remaining) | ||
|
||
yarp.Network.init() | ||
|
||
rf = yarp.ResourceFinder() | ||
rf.setDefaultContext(args.context) | ||
rf.setDefaultConfigFile(args.ini) | ||
|
||
if args.backend == 'piper': | ||
if args.dictionary is None or args.language is None: | ||
print('Dictionary and language must be specified for Piper') | ||
raise SystemExit | ||
|
||
synthesizer_factory = PiperSynthesizerFactory(args.device, args.dictionary, args.language, rf) | ||
else: | ||
print('Backend not available, must be one of: %s' % ', '.join(BACKENDS)) | ||
raise SystemExit | ||
|
||
if not yarp.Network.checkNetwork(): | ||
print('No YARP network available') | ||
raise SystemExit | ||
|
||
rpc = yarp.RpcServer() | ||
|
||
if not rpc.open(args.prefix + '/rpc:s'): | ||
print('Unable to open RPC port') | ||
raise SystemExit | ||
|
||
try: | ||
q = queue.Queue() | ||
|
||
with sd.RawOutputStream(blocksize=8000, | ||
device=args.device, | ||
dtype='int16', | ||
channels=1, | ||
callback=lambda outdata, frames, time, status: q.put(bytes(outdata))) as stream: | ||
synthesizer = synthesizer_factory.create(stream) | ||
synthesizer.yarp().attachAsServer(rpc) | ||
|
||
while True: | ||
frame = q.get() | ||
# TODO | ||
except KeyboardInterrupt: | ||
rpc.interrupt() | ||
rpc.close() | ||
parser.exit(0) |