Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
PeterBowman committed Dec 3, 2023
1 parent 89473c4 commit 54dfbd1
Showing 1 changed file with 148 additions and 0 deletions.
148 changes: 148 additions & 0 deletions programs/speechSynthesis/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#!/usr/bin/env python3

import argparse
import queue
import roboticslab_speech
import sounddevice as sd
import yarp

from abc import ABC, abstractmethod
from piper import PiperVoice # TODO: hide import
from piper.download import find_voice # TODO: hide import

class SynthesizerFactory(ABC):
@abstractmethod
def create(self, stream):
pass

class PiperSynthesizerFactory(SynthesizerFactory):
def __init__(self, device, model, use_cuda, rf):
self.device = device
self.model = model
self.rf = rf

def create(self, stream):
return PiperSynthesizer(stream, self.device, self.model, use_cuda, self.rf)

class SpeechSynthesizer(roboticslab_speech.SpeechSynthesis):
def __init__(self, stream, device):
super().__init__()
self.stream = stream
device_info = sd.query_devices(device, 'input')
# soundfile expects an int, sounddevice provides a float:
self.sample_rate = int(device_info['default_samplerate'])

class PiperSynthesizer(SpeechSynthesizer):
def __init__(self, stream, device, model, use_cuda, rf):
super().__init__(stream, device)
self.model = model
self.rf = rf
self.voice = PiperVoice(self.model, None, use_cuda=use_cuda)

def say(self, text):
pass

def play(self):
pass

def pause(self):
pass

def stop(self):
pass

def checkSayDone(self):
pass

def setLanguage(self, language):
pass

def setSpeed(self, speed):
pass

def setPitch(self, pitch):
pass

def getSpeed(self):
pass

def getPitch(self):
pass

def getSupportedLangs(self):
pass

def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text

BACKENDS = ['piper']

parser = argparse.ArgumentParser(description='YARP service that transforms text into live audio output', add_help=False)
parser.add_argument('--list-devices', action='store_true', help='list available audio devices and exit')
parser.add_argument('--list-backends', action='store_true', help='list available TTS backends and exit')
args, remaining = parser.parse_known_args()

if args.list_devices:
print(sd.query_devices())
raise SystemExit
elif args.list_backends:
print('\n'.join(BACKENDS))
raise SystemExit

parser = argparse.ArgumentParser(description=parser.description, formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[parser])
parser.add_argument('--backend', '-b', type=str, required=True, help='ASR backend engine')
parser.add_argument('--device', '-d', type=int_or_str, help='input device (numeric ID or substring)')
parser.add_argument('--cuda', action='store_true', help='Use Onnx CUDA execution provider (requires onnxruntime-gpu)')
parser.add_argument('--prefix', '-p', type=str, default='/speechSynthesis', help='YARP port prefix')
parser.add_argument('--context', type=str, default='speechSynthesis', help='YARP context directory')
parser.add_argument('--from', type=str, dest='ini', default='speechSynthesis.ini', help='YARP configuration (.ini) file')
args = parser.parse_args(remaining)

yarp.Network.init()

rf = yarp.ResourceFinder()
rf.setDefaultContext(args.context)
rf.setDefaultConfigFile(args.ini)

if args.backend == 'piper':
if args.dictionary is None or args.language is None:
print('Dictionary and language must be specified for Piper')
raise SystemExit

synthesizer_factory = PiperSynthesizerFactory(args.device, args.dictionary, args.language, rf)
else:
print('Backend not available, must be one of: %s' % ', '.join(BACKENDS))
raise SystemExit

if not yarp.Network.checkNetwork():
print('No YARP network available')
raise SystemExit

rpc = yarp.RpcServer()

if not rpc.open(args.prefix + '/rpc:s'):
print('Unable to open RPC port')
raise SystemExit

try:
q = queue.Queue()

with sd.RawOutputStream(blocksize=8000,
device=args.device,
dtype='int16',
channels=1,
callback=lambda outdata, frames, time, status: q.put(bytes(outdata))) as stream:
synthesizer = synthesizer_factory.create(stream)
synthesizer.yarp().attachAsServer(rpc)

while True:
frame = q.get()
# TODO
except KeyboardInterrupt:
rpc.interrupt()
rpc.close()
parser.exit(0)

0 comments on commit 54dfbd1

Please sign in to comment.