[WIP]

roboticslab-uc3m · Dec 3, 2023 · 54dfbd1 · 54dfbd1
1 parent 89473c4
commit 54dfbd1
Showing 1 changed file with 148 additions and 0 deletions.
diff --git a/programs/speechSynthesis/test.py b/programs/speechSynthesis/test.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+
+import argparse
+import queue
+import roboticslab_speech
+import sounddevice as sd
+import yarp
+
+from abc import ABC, abstractmethod
+from piper import PiperVoice # TODO: hide import
+from piper.download import find_voice # TODO: hide import
+
+class SynthesizerFactory(ABC):
+    @abstractmethod
+    def create(self, stream):
+        pass
+
+class PiperSynthesizerFactory(SynthesizerFactory):
+    def __init__(self, device, model, use_cuda, rf):
+        self.device = device
+        self.model = model
+        self.rf = rf
+
+    def create(self, stream):
+        return PiperSynthesizer(stream, self.device, self.model, use_cuda, self.rf)
+
+class SpeechSynthesizer(roboticslab_speech.SpeechSynthesis):
+    def __init__(self, stream, device):
+        super().__init__()
+        self.stream = stream
+        device_info = sd.query_devices(device, 'input')
+        # soundfile expects an int, sounddevice provides a float:
+        self.sample_rate = int(device_info['default_samplerate'])
+
+class PiperSynthesizer(SpeechSynthesizer):
+    def __init__(self, stream, device, model, use_cuda, rf):
+        super().__init__(stream, device)
+        self.model = model
+        self.rf = rf
+        self.voice = PiperVoice(self.model, None, use_cuda=use_cuda)
+
+    def say(self, text):
+        pass
+
+    def play(self):
+        pass
+
+    def pause(self):
+        pass
+
+    def stop(self):
+        pass
+
+    def checkSayDone(self):
+        pass
+
+    def setLanguage(self, language):
+        pass
+
+    def setSpeed(self, speed):
+        pass
+
+    def setPitch(self, pitch):
+        pass
+
+    def getSpeed(self):
+        pass
+
+    def getPitch(self):
+        pass
+
+    def getSupportedLangs(self):
+        pass
+
+def int_or_str(text):
+    """Helper function for argument parsing."""
+    try:
+        return int(text)
+    except ValueError:
+        return text
+
+BACKENDS = ['piper']
+
+parser = argparse.ArgumentParser(description='YARP service that transforms text into live audio output', add_help=False)
+parser.add_argument('--list-devices', action='store_true', help='list available audio devices and exit')
+parser.add_argument('--list-backends', action='store_true', help='list available TTS backends and exit')
+args, remaining = parser.parse_known_args()
+
+if args.list_devices:
+    print(sd.query_devices())
+    raise SystemExit
+elif args.list_backends:
+    print('\n'.join(BACKENDS))
+    raise SystemExit
+
+parser = argparse.ArgumentParser(description=parser.description, formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[parser])
+parser.add_argument('--backend', '-b', type=str, required=True, help='ASR backend engine')
+parser.add_argument('--device', '-d', type=int_or_str, help='input device (numeric ID or substring)')
+parser.add_argument('--cuda', action='store_true', help='Use Onnx CUDA execution provider (requires onnxruntime-gpu)')
+parser.add_argument('--prefix', '-p', type=str, default='/speechSynthesis', help='YARP port prefix')
+parser.add_argument('--context', type=str, default='speechSynthesis', help='YARP context directory')
+parser.add_argument('--from', type=str, dest='ini', default='speechSynthesis.ini', help='YARP configuration (.ini) file')
+args = parser.parse_args(remaining)
+
+yarp.Network.init()
+
+rf = yarp.ResourceFinder()
+rf.setDefaultContext(args.context)
+rf.setDefaultConfigFile(args.ini)
+
+if args.backend == 'piper':
+    if args.dictionary is None or args.language is None:
+        print('Dictionary and language must be specified for Piper')
+        raise SystemExit
+
+    synthesizer_factory = PiperSynthesizerFactory(args.device, args.dictionary, args.language, rf)
+else:
+    print('Backend not available, must be one of: %s' % ', '.join(BACKENDS))
+    raise SystemExit
+
+if not yarp.Network.checkNetwork():
+    print('No YARP network available')
+    raise SystemExit
+
+rpc = yarp.RpcServer()
+
+if not rpc.open(args.prefix + '/rpc:s'):
+    print('Unable to open RPC port')
+    raise SystemExit
+
+try:
+    q = queue.Queue()
+
+    with sd.RawOutputStream(blocksize=8000,
+                            device=args.device,
+                            dtype='int16',
+                            channels=1,
+                            callback=lambda outdata, frames, time, status: q.put(bytes(outdata))) as stream:
+        synthesizer = synthesizer_factory.create(stream)
+        synthesizer.yarp().attachAsServer(rpc)
+
+        while True:
+            frame = q.get()
+            # TODO
+except KeyboardInterrupt:
+    rpc.interrupt()
+    rpc.close()
+    parser.exit(0)