Skip to content

Commit

Permalink
STTTGTS
Browse files Browse the repository at this point in the history
  • Loading branch information
Kayce Basques committed Feb 9, 2024
1 parent a7ece4e commit cdeb709
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 4 deletions.
Binary file added _static/jeff.mp4
Binary file not shown.
113 changes: 113 additions & 0 deletions blog/STTTGTS.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
.. _STTTGTS:

==================================
Speech To Text To Gemini To Speech
==================================

*2024 Feb 8*

.. raw:: html

<video controls style="width: 100%; aspect-ratio: 1 / 1;">
<source src="/_static/jeff.mp4" type="video/mp4"/>
</video>

.. code-block:: py
import os
import random
import subprocess
import threading
import time
import dotenv
import gpiozero
from google.cloud import speech
import google.generativeai as gemini
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/home/bookworm/service-account.json'
env = dotenv.dotenv_values('.env')
gemini.configure(api_key=env['GEMINI_API_KEY'])
model = gemini.GenerativeModel('gemini-pro')
# https://projects.raspberrypi.org/en/projects/physical-computing/5
button = gpiozero.Button(2)
audio_file = 'user.flac'
led = gpiozero.RGBLED(16, 20, 21)
# https://codelabs.developers.google.com/codelabs/cloud-speech-text-python3#3
def speech_to_text():
client = speech.SpeechClient()
# https://cloud.google.com/speech-to-text/docs/sync-recognize
with open(audio_file, 'rb') as f:
content = f.read()
config = speech.RecognitionConfig(
language_code='en',
encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=48000,
audio_channel_count=2
)
audio = speech.RecognitionAudio(content=content)
response = client.recognize(config=config, audio=audio)
transcript = ''
for result in response.results:
transcript += result.alternatives[0].transcript
return transcript
# print(response)
# best_response = response.alternatives[0]
# print(best_response)
def record():
led.on()
led.color = (1, 0, 0)
# start the audio recording
process = subprocess.Popen(['rec', audio_file, 'rate', '48k'])
# https://unix.stackexchange.com/a/57593/79351
while not stop_recording.is_set():
# wait for it to end...
time.sleep(1)
process.terminate()
led.off()
def thinky_blinky():
"""blink like an octopus dreaming... https://youtu.be/0vKCLJZbytU"""
colors = [
(0, 0, 0),
(1, 0, 0),
(1, 1, 0),
(1, 0, 1),
(0, 1, 0),
(0, 1, 1),
(1, 1, 1)
]
led.on()
while not stop_blinking.is_set():
led.color = random.choice(colors)
time.sleep(0.1)
led.off()
while True:
button.wait_for_press()
stop_recording = threading.Event()
thread = threading.Thread(target=record)
thread.start()
button.wait_for_release()
stop_recording.set()
thread.join()
stop_blinking = threading.Event()
thread2 = threading.Thread(target=thinky_blinky)
thread2.start()
# time.sleep(1)
# subprocess.run(['play', '-v', '3.0', audio_file])
text = speech_to_text()
print(text)
response = model.generate_content(text)
print(response.text)
stop_blinking.set()
thread2.join()
led.on()
led.color = (0, 0.2, 1)
p = subprocess.Popen(['spd-say', '--wait', '--volume', '+100', f'"{response.text}"'])
while p.poll() is None:
time.sleep(1)
led.off()
1 change: 1 addition & 0 deletions blog/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ Read yourself a thing on this here weblog
littlebraincell
systems
systemantics
STTTGTS
4 changes: 0 additions & 4 deletions index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ Adventures in {hard,soft,wet}ware by `Kayce Basques <https://kayce.basqu.es>`_

Read yourself a thing on the :ref:`blog`

My one-and-only OKR for this site is to get progressively unhinged at a rate of
6% per annum --- so if you check back in 2033 you should hopefully see a 79%
increase in unhinged content over the 2023 baseline

.. toctree::
:maxdepth: 1
:hidden:
Expand Down

0 comments on commit cdeb709

Please sign in to comment.