Skip to content

Commit

Permalink
Merge pull request #40 from inworld-ai/yjRegen2
Browse files Browse the repository at this point in the history
1. INTG-1401 regenerate utterance. 2. Put Sentis lipsync in separate demo
  • Loading branch information
Juniverse authored Apr 11, 2024
2 parents f49e890 + ac08786 commit c1172eb
Show file tree
Hide file tree
Showing 20 changed files with 250 additions and 127 deletions.
1 change: 1 addition & 0 deletions Runtime/Resources/InworldAI.asset
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ MonoBehaviour:
emotions: 1
interruptions: 1
narratedActions: 1
regenerateResponse: 1
text: 1
triggers: 1
phonemeInfo: 1
Expand Down
80 changes: 52 additions & 28 deletions Runtime/Scripts/AudioCapture.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ namespace Inworld
public class AudioCapture : MonoBehaviour
{
[SerializeField] protected MicSampleMode m_SamplingMode = MicSampleMode.NO_FILTER;
[Tooltip("Hold the key to sample, release the key to send audio")]
[SerializeField] protected KeyCode m_PushToTalkKey = KeyCode.None;
[Range(1, 2)][SerializeField] protected float m_PlayerVolumeThreshold = 2f;
[SerializeField] protected int m_BufferSeconds = 1;
[SerializeField] protected int m_AudioToPushCapacity = 100;
[SerializeField] protected string m_DeviceName;


public UnityEvent OnRecordingStart;
public UnityEvent OnRecordingEnd;
Expand All @@ -41,14 +43,13 @@ public class AudioCapture : MonoBehaviour

#region Variables
protected float m_CharacterVolume = 1f;
protected MicSampleMode m_LastSampleMode;
protected MicSampleMode m_InitSampleMode;
protected const int k_SizeofInt16 = sizeof(short);
protected const int k_SampleRate = 16000;
protected const int k_Channel = 1;
protected AudioClip m_Recording;
protected IEnumerator m_AudioCoroutine;
protected bool m_IsPlayerSpeaking;
protected bool m_IsCapturing;
protected float m_BackgroundNoise;
// Last known position in AudioClip buffer.
protected int m_LastPosition;
Expand Down Expand Up @@ -86,7 +87,22 @@ public float Volume
/// <summary>
/// Signifies if microphone is capturing audio.
/// </summary>
public bool IsCapturing => m_IsCapturing;
public bool IsCapturing
{
get => m_SamplingMode != MicSampleMode.NO_MIC;
set
{
if (value)
{
if (m_SamplingMode == MicSampleMode.NO_MIC)
m_SamplingMode = m_InitSampleMode;
}
else
{
m_SamplingMode = MicSampleMode.NO_MIC;
}
}
}
/// <summary>
/// Signifies if audio should be pushed to server automatically as it is captured.
/// </summary>
Expand All @@ -98,12 +114,10 @@ public bool AutoPush
if (value)
{
if (m_SamplingMode == MicSampleMode.PUSH_TO_TALK)
m_SamplingMode = m_LastSampleMode;
m_SamplingMode = m_InitSampleMode;
}
else
{
if (m_SamplingMode != MicSampleMode.PUSH_TO_TALK)
m_LastSampleMode = m_SamplingMode;
m_SamplingMode = MicSampleMode.PUSH_TO_TALK;
}
}
Expand All @@ -122,8 +136,8 @@ public MicSampleMode SampleMode
/// </summary>
public bool IsPlayerTurn =>
m_SamplingMode == MicSampleMode.NO_FILTER ||
m_SamplingMode == MicSampleMode.PUSH_TO_TALK ||
m_SamplingMode == MicSampleMode.TURN_BASED && !InworldController.CharacterHandler.IsAnyCharacterSpeaking;
m_SamplingMode == MicSampleMode.PUSH_TO_TALK ||
m_SamplingMode== MicSampleMode.TURN_BASED && !InworldController.CharacterHandler.IsAnyCharacterSpeaking;

/// <summary>
/// A flag to check if audio is available to send to server.
Expand Down Expand Up @@ -226,39 +240,44 @@ public void ChangeInputDevice(string deviceName)
/// </summary>
public void StartRecording()
{
if (m_IsCapturing)
if (IsCapturing)
return;
#if UNITY_WEBGL && !UNITY_EDITOR
m_LastPosition = WebGLGetPosition();
#else
m_LastPosition = Microphone.GetPosition(m_DeviceName);
#endif
m_IsCapturing = true;
IsCapturing = true;
OnRecordingStart?.Invoke();
}
/// <summary>
/// Unity's official microphone module stops recording, will trigger OnRecordingEnd event.
/// </summary>
public void StopRecording()
{
if (!m_IsCapturing)
if (!IsCapturing)
return;
m_AudioToPush.Clear();
m_IsCapturing = false;
IsCapturing = false;
OnRecordingEnd?.Invoke();
}
/// <summary>
/// Manually push the audio wave data to server.
/// </summary>
public void PushAudio()
public IEnumerator PushAudio()
{
yield return new WaitForSeconds(1);
foreach (string audioData in m_AudioToPush)
{
InworldController.Instance.SendAudio(audioData);
}
m_AudioToPush.Clear();
}
public virtual void StopAudio() => m_CurrentAudioSession.StopAudio();
public virtual void StopAudio()
{
m_AudioToPush.Clear();
m_CurrentAudioSession.StopAudio();
}
public virtual void StartAudio(List<string> characters = null)
{
if (characters == null || characters.Count == 0)
Expand Down Expand Up @@ -336,16 +355,32 @@ protected virtual void OnDestroy()
StopRecording();
StopMicrophone(m_DeviceName);
}
protected void Update()
{
HandlePTT();
if (m_AudioToPush.Count > m_AudioToPushCapacity)
m_AudioToPush.RemoveAt(0);
}

#endregion

#region Protected Functions

protected virtual void HandlePTT()
{
AutoPush = !Input.GetKey(m_PushToTalkKey);
if (Input.GetKeyDown(m_PushToTalkKey))
m_AudioToPush.Clear();
if (Input.GetKeyUp(m_PushToTalkKey))
StartCoroutine(PushAudio());
}
protected virtual void Init()
{
m_CurrentAudioSession = new AudioSessionInfo();
m_BufferSize = m_BufferSeconds * k_SampleRate;
m_ByteBuffer = new byte[m_BufferSize * k_Channel * k_SizeofInt16];
m_InputBuffer = new float[m_BufferSize * k_Channel];
m_LastSampleMode = m_SamplingMode;
m_InitSampleMode = m_SamplingMode;
#if UNITY_WEBGL && !UNITY_EDITOR
s_WebGLBuffer = new float[m_BufferSize * k_Channel];
WebGLInit(OnWebGLInitialized);
Expand All @@ -368,20 +403,11 @@ protected virtual void OnPacketSent(InworldPacket packet)
}
protected virtual void OnCharacterJoined(InworldCharacter character)
{
if (!InworldController.CharacterHandler.CurrentCharacter) // Group Chat Mode
{
//m_CurrentAudioSession.StopAudio();
m_CurrentAudioSession.StartAudio(InworldController.CharacterHandler.CurrentCharacterNames);
}
character.Event.onCharacterSelected.AddListener(OnCharacterSelected);
character.Event.onCharacterDeselected.AddListener(OnCharacterDeselected);
}
protected virtual void OnCharacterLeft(InworldCharacter character)
{
if (!InworldController.CharacterHandler.CurrentCharacter) // Group Chat Mode
{
m_CurrentAudioSession.StartAudio(InworldController.CharacterHandler.CurrentCharacterNames);
}
character.Event.onCharacterSelected.RemoveListener(OnCharacterSelected);
character.Event.onCharacterDeselected.RemoveListener(OnCharacterDeselected);
}
Expand All @@ -394,7 +420,7 @@ protected virtual IEnumerator AudioCoroutine()
while (true)
{
yield return _Calibrate();
if (!m_IsCapturing || IsBlocked)
if (!IsCapturing || IsBlocked)
{
yield return null;
continue;
Expand All @@ -406,10 +432,8 @@ protected virtual IEnumerator Collect()
{
if (m_SamplingMode == MicSampleMode.NO_MIC)
yield break;

if (m_SamplingMode != MicSampleMode.PUSH_TO_TALK && m_BackgroundNoise == 0)
yield break;

int nSize = GetAudioData();
if (nSize <= 0)
yield break;
Expand Down
20 changes: 1 addition & 19 deletions Runtime/Scripts/CharacterHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,25 +71,7 @@ public InworldCharacter CurrentCharacter
/// If set, it'll also start audio sampling if `ManualAudioHandling` is false, and invoke the event OnCharacterChanged
/// </summary>
public List<InworldCharacter> CurrentCharacters => m_CharacterList;

/// <summary>
/// If it's false, AudioCapture of the InworldController will automatically start recording player's voice when at least a character is selected.
/// Otherwise, developers need to manually call `InworldController.Instance.StartAudio()` to start microphone.
/// </summary>
public bool ManualAudioHandling
{
get => m_ManualAudioHandling;
set
{
if (m_ManualAudioHandling == value)
return;
m_ManualAudioHandling = value;
if (m_ManualAudioHandling)
InworldController.Instance.StopAudio();
else
InworldController.Instance.StartAudio();
}
}

/// <summary>
/// Get the current Character Selecting Method. By default it's manual.
/// </summary>
Expand Down
5 changes: 5 additions & 0 deletions Runtime/Scripts/Data/Entities/Capabilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public class Capabilities
public bool emotions;
public bool interruptions;
public bool narratedActions;
public bool regenerateResponse;
public bool text;
public bool triggers;
public bool phonemeInfo;
Expand All @@ -31,6 +32,7 @@ public Capabilities(Capabilities rhs)
emotions = rhs.emotions;
interruptions = rhs.interruptions;
narratedActions = rhs.narratedActions;
regenerateResponse = rhs.regenerateResponse;
text = rhs.text;
triggers = rhs.triggers;
phonemeInfo = rhs.phonemeInfo;
Expand All @@ -44,6 +46,7 @@ public void CopyFrom(Capabilities rhs)
emotions = rhs.emotions;
interruptions = rhs.interruptions;
narratedActions = rhs.narratedActions;
regenerateResponse = rhs.regenerateResponse;
text = rhs.text;
triggers = rhs.triggers;
phonemeInfo = rhs.phonemeInfo;
Expand Down Expand Up @@ -73,6 +76,8 @@ public override string ToString()
result += "INTERRUPTIONS ";
if (narratedActions)
result += "NARRATIVE ";
if (regenerateResponse)
result += "REGENERATERESPONSE ";
if (text)
result += "TEXT ";
if (triggers)
Expand Down
2 changes: 1 addition & 1 deletion Runtime/Scripts/Data/Entities/InworldSceneData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public class LoadSceneResponse
public List<string> UpdateRegisteredCharacter(ref List<InworldCharacterData> outData)
{
List<string> result = new List<string>();
foreach (var charData in outData)
foreach (InworldCharacterData charData in outData)
{
string registeredID = agents.FirstOrDefault(a => a.brainName == charData.brainName)?.agentId;
if (string.IsNullOrEmpty(registeredID))
Expand Down
41 changes: 41 additions & 0 deletions Runtime/Scripts/Data/Packets/ApplyResponsePacket.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*************************************************************************************************
* Copyright 2022-2024 Theai, Inc. dba Inworld AI
*
* Use of this source code is governed by the Inworld.ai Software Development Kit License Agreement
* that can be found in the LICENSE.md file or at https://www.inworld.ai/sdk-license
*************************************************************************************************/
using System;
using System.Collections.Generic;
using UnityEngine;

namespace Inworld.Packet
{
[Serializable]
public class ApplyResponse
{
public PacketId packetId;
}
[Serializable]
public class ApplyResponseEvent
{
public ApplyResponse applyResponse;
}

[Serializable]
public class ApplyResponsePacket : InworldPacket
{
public ApplyResponseEvent mutation;

public ApplyResponsePacket()
{
type = "MUTATION";
mutation = new ApplyResponseEvent();
}
public ApplyResponsePacket(InworldPacket rhs, ApplyResponseEvent evt) : base(rhs)
{
type = "MUTATION";
mutation = evt;
}
public override string ToJson => JsonUtility.ToJson(this);
}
}
3 changes: 3 additions & 0 deletions Runtime/Scripts/Data/Packets/ApplyResponsePacket.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@ public class CancelResponse
public List<string> utteranceId;
}
[Serializable]
public class MutationEvent
public class CancelResponseEvent
{
public CancelResponse cancelResponses;
}
[Serializable]
public class MutationPacket : InworldPacket
public class CancelResponsePacket : InworldPacket
{
public MutationEvent mutation;
public CancelResponseEvent mutation;

public MutationPacket()
public CancelResponsePacket()
{
type = "MUTATION";
mutation = new MutationEvent();
mutation = new CancelResponseEvent();
}
public MutationPacket(InworldPacket rhs, MutationEvent evt) : base(rhs)
public CancelResponsePacket(InworldPacket rhs, CancelResponseEvent evt) : base(rhs)
{
type = "MUTATION";
mutation = evt;
Expand Down
4 changes: 2 additions & 2 deletions Runtime/Scripts/Data/Packets/NetworkPacket.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public class InworldNetworkPacket : InworldPacket
public DataChunk dataChunk;
public GestureEvent gesture;
public CustomEvent custom;
public MutationEvent mutation;
public CancelResponseEvent mutation;
public EmotionEvent emotion;
public ActionEvent action;
public SessionResponseEvent sessionControlResponse;
Expand All @@ -79,7 +79,7 @@ public InworldPacket Packet
if (custom != null && !string.IsNullOrEmpty(custom.name))
return new CustomPacket(this, custom);
if (mutation != null && !string.IsNullOrEmpty(mutation.cancelResponses?.interactionId))
return new MutationPacket(this, mutation);
return new CancelResponsePacket(this, mutation);
if (emotion != null && !string.IsNullOrEmpty(emotion.behavior))
return new EmotionPacket(this, emotion);
if (action != null && action.narratedAction != null && !string.IsNullOrEmpty(action.narratedAction.content))
Expand Down
Loading

0 comments on commit c1172eb

Please sign in to comment.