-
Notifications
You must be signed in to change notification settings - Fork 1
/
google_TTS_srv_eng.py
executable file
·143 lines (116 loc) · 4.9 KB
/
google_TTS_srv_eng.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/python3
from google.cloud import texttospeech
from pydub import AudioSegment
from pydub.playback import play
import io
from roboy_cognition_msgs.srv import Talk, TalkResponse, TalkToFile, TalkToFileResponse
from roboy_cognition_msgs.msg import SpeechSynthesis
import rospy
import rospkg
import rosgraph
# from rclpy.node import Node
import time
import logging
import sys
sys.tracebacklimit = 0
if not rosgraph.is_master_online():
raise Exception("ROS master is not online")
# export GOOGLE_APPLICATION_CREDENTIALS=""
class GoogleTTS():
def __init__(self):
rospy.init_node('google_tts_en')
rospack = rospkg.RosPack()
self.path = rospack.get_path('soncreo')+'/generated/'
self.publisher = rospy.Publisher('/roboy/cognition/speech/synthesis', SpeechSynthesis)
self.srv = rospy.Service('/roboy/cognition/speech/synthesis/talk', Talk, self.talk_callback)
self.save_srv = rospy.Service('/roboy/cognition/speech/synthesis/save/english', TalkToFile, self.save_callback)
self.client = texttospeech.TextToSpeechClient()
self.voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US',
ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)
self.audio_config = texttospeech.types.AudioConfig(
# effects_profile_id=["large-home-entertainment-class-device"],
# pitch=-1.0,
audio_encoding=texttospeech.enums.AudioEncoding.MP3)
self.FORMAT = "wav"
rospy.loginfo("Ready to /roboy/cognition/speech/synthesis/talk")
self.synthesize("I am ready!")
def talk_callback(self, request):
response = TalkResponse()
response.success = True # evtl. return {'success':True}
rospy.loginfo('Incoming Text: %s' % (request.text))
msg = SpeechSynthesis()
msg.duration = 5
msg.phoneme = 'o'
self.publisher.publish(msg)
self.synthesize(request.text)
msg.phoneme = 'sil'
msg.duration = 0
self.publisher.publish(msg)
return response
def save_callback(self, request):
response = TalkToFileResponse()
if request.text != "":
rospy.loginfo('Incoming Text: %s' % (request.text))
if request.filename != "":
rospy.loginfo('Saving to filename: %s' % (request.filename))
response.success = True
self.synthesize(request.text, request.filename)
else:
response.success = False
return response
@staticmethod
def play_audio(fname):
# from pydub.playback import play
# from pydub import AudioSegment
# song = AudioSegment.from_file(fname, format="wav")
# import pdb; #pdb.set_trace()
wf = wave.open(fname, 'rb')
p = pyaudio.PyAudio()
chunk = 1024
# open stream based on the wave object which has been input.
#import pdb; pdb.set_trace()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
input_device_index=1,
output=True)
# read data (based on the chunk size)
data = wf.readframes(chunk)
# play stream (looping from beginning of file to the end)
while len(data) > 0:
# writing to the stream is what *actually* plays the sound.
stream.write(data)
data = wf.readframes(chunk)
# cleanup stuff.
stream.stop_stream()
stream.close()
p.terminate()
print("Output wave generated")
def synthesize(self, text, filename=None):
synthesis_input = texttospeech.types.SynthesisInput(text=text)
rospy.loginfo("sending request..")
response = self.client.synthesize_speech(synthesis_input, self.voice, self.audio_config)
# import pdb; pdb.set_trace()
# print(len(response.audio_content))
song = AudioSegment.from_file(io.BytesIO(response.audio_content), format="mp3")
if filename:
if "/" in filename:
dirname = filename.split("/")[0]
if not os.path.exists(self.path+dirname):
os.mkdir(self.path+dirname)
song.export(self.path+filename+"."+self.FORMAT, format=self.FORMAT)
rospy.loginfo("Saved to %s"%(self.path+filename+"."+self.FORMAT))
play(song)
# import pdb; pdb.set_trace()
# song.export("google-output.wav", format="wav")
# self.play_audio("google-output.wav")
# song = AudioSegment.from_file("./output/google-output.wav", format="wav")
# play(song)
# self.get_logger().info("PyAudio now..")
# Comb.play_audio("./output/google-output.wav")
def main(args=None):
google_tts = GoogleTTS()
rospy.spin()
if __name__ == '__main__':
main()