Skip to content

Commit

Permalink
feat: implement sending stop frames on pause/stop, TODO: make voice r…
Browse files Browse the repository at this point in the history
…eceive smooth while sending audio
  • Loading branch information
Neko-Life committed Oct 17, 2024
1 parent 515cb6a commit 445542e
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 17 deletions.
35 changes: 32 additions & 3 deletions include/dpp/discordvoiceclient.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ class DPP_EXPORT discord_voice_client : public websocket_client
*/
void cleanup();

/**
* @brief A frame of silence packet
*/
static constexpr uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe };

/**
* @brief Mutex for outbound packet stream
*/
Expand Down Expand Up @@ -434,6 +439,13 @@ class DPP_EXPORT discord_voice_client : public websocket_client
*/
bool paused;

/**
* @brief Whether has sent 5 frame of silence before stopping on pause/stop.
*
* This is to avoid unintended Opus interpolation with subsequent transmissions.
*/
bool sent_stop_frames;

#ifdef HAVE_VOICE
/**
* @brief libopus encoder
Expand Down Expand Up @@ -650,8 +662,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client
* @param packet packet data
* @param len length of packet
* @param duration duration of opus packet
* @param send_now send this packet right away without buffering.
* Do NOT set send_now to true outside write_ready.
*/
void send(const char* packet, size_t len, uint64_t duration);
void send(const char* packet, size_t len, uint64_t duration, bool send_now = false);

/**
* @brief Queue a message to be sent via the websocket
Expand Down Expand Up @@ -962,6 +976,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client
* @param duration Generally duration is 2.5, 5, 10, 20, 40 or 60
* if the timescale is 1000000 (1ms)
*
* @param send_now Send this packet right away without buffering,
* this will skip duration calculation for the packet being sent
* and only safe to be set to true in write_ready.
*
* @return discord_voice_client& Reference to self
*
* @note It is your responsibility to ensure that packets of data
Expand All @@ -972,7 +990,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client
*
* @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++
*/
discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration);
discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now = false);

/**
* @brief Send opus packets to the voice channel
Expand All @@ -999,7 +1017,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client
*
* @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++
*/
discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length);
discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length);

/**
* @brief Send silence to the voice channel
Expand All @@ -1012,6 +1030,17 @@ class DPP_EXPORT discord_voice_client : public websocket_client
*/
discord_voice_client& send_silence(const uint64_t duration);

/**
* @brief Send stop frames to the voice channel.
*
* @param send_now send this packet right away without buffering.
* Do NOT set send_now to true outside write_ready.
*
* @return discord_voice_client& Reference to self
* @throw dpp::voice_exception if voice support is not compiled into D++
*/
discord_voice_client& send_stop_frames(bool send_now = false);

/**
* @brief Sets the audio type that will be sent with send_audio_* methods.
*
Expand Down
18 changes: 17 additions & 1 deletion src/dpp/discordvoiceclient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
*
************************************************************************************/

#include <cstdint>
#ifdef _WIN32
#include <WinSock2.h>
#include <WS2tcpip.h>
Expand Down Expand Up @@ -148,6 +149,9 @@ bool discord_voice_client::is_end_to_end_encrypted() const {

discord_voice_client& discord_voice_client::pause_audio(bool pause) {
this->paused = pause;
if (!this->paused) {
this->sent_stop_frames = false;
}
return *this;
}

Expand Down Expand Up @@ -176,6 +180,7 @@ discord_voice_client& discord_voice_client::stop_audio() {
outbuf.clear();
track_meta.clear();
tracks = 0;
this->send_stop_frames();
return *this;
}

Expand Down Expand Up @@ -398,7 +403,6 @@ discord_voice_client& discord_voice_client::skip_to_next_marker() {
}

discord_voice_client& discord_voice_client::send_silence(const uint64_t duration) {
uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe };
send_audio_opus(silence_packet, 3, duration);
return *this;
}
Expand All @@ -412,6 +416,7 @@ discord_voice_client& discord_voice_client::set_send_audio_type(send_audio_type_

discord_voice_client& discord_voice_client::speak() {
if (!this->sending) {
std::cout << "Sending voice_opcode_client_speaking\n";
this->queue_message(json({
{"op", voice_opcode_client_speaking},
{"d", {
Expand Down Expand Up @@ -443,4 +448,15 @@ uint16_t discord_voice_client::get_iteration_interval() {
return this->iteration_interval;
}

discord_voice_client& discord_voice_client::send_stop_frames(bool send_now) {
uint8_t silence_frames[sizeof(silence_packet) / sizeof(*silence_packet) * 5];
for (size_t i = 0; i < sizeof(silence_frames) / sizeof(*silence_frames); i++) {
silence_frames[i] = silence_packet[i % 3];
}

this->send_audio_opus(silence_frames, sizeof(silence_frames) / sizeof(*silence_frames), 20, send_now);

return *this;
}

} // namespace dpp
16 changes: 15 additions & 1 deletion src/dpp/voice/enabled/courier_loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,21 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
break;
}

shared_state.signal_iteration.wait(lk);
shared_state.signal_iteration.wait(lk, [&shared_state](){
/*
* Actually check the state we're looking for instead of waking up
* everytime read_ready was called.
*/
for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) {
if (parking_lot.parked_payloads.empty()) {
continue;
}

return true;
}
return false;
});

/*
* More data came or about to terminate, or just a spurious wake.
* We need to collect the payloads again to determine what to do next.
Expand Down
8 changes: 4 additions & 4 deletions src/dpp/voice/enabled/opus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ discord_voice_client& discord_voice_client::send_audio_raw(uint16_t* audio_data,
return *this;
}

discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length) {
discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length) {
int samples = opus_packet_get_nb_samples(opus_packet, (opus_int32)length, opus_sample_rate_hz);
uint64_t duration = (samples / 48) / (timescale / 1000000);
send_audio_opus(opus_packet, length, duration);
send_audio_opus(opus_packet, length, duration, false);
return *this;
}

discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) {
discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now) {
int frame_size = (int)(48 * duration * (timescale / 1000000));
opus_int32 encoded_audio_max_length = (opus_int32)length;
std::vector<uint8_t> encoded_audio(encoded_audio_max_length);
Expand Down Expand Up @@ -147,7 +147,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
/* Append the 4 byte nonce to the resulting payload */
std::memcpy(payload.data() + payload.size() - sizeof(noncel), &noncel, sizeof(noncel));

this->send(reinterpret_cast<const char *>(payload.data()), payload.size(), duration);
this->send(reinterpret_cast<const char *>(payload.data()), payload.size(), duration, send_now);

timestamp += frame_size;

Expand Down
17 changes: 10 additions & 7 deletions src/dpp/voice/enabled/read_write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace dpp {

dpp::socket discord_voice_client::want_write() {
std::lock_guard<std::mutex> lock(this->stream_mutex);
if (!this->paused && !outbuf.empty()) {
if (!this->sent_stop_frames && !outbuf.empty()) {
return fd;
}
return INVALID_SOCKET;
Expand All @@ -42,13 +42,16 @@ dpp::socket discord_voice_client::want_read() {
}


void discord_voice_client::send(const char* packet, size_t len, uint64_t duration) {
voice_out_packet frame;
frame.packet.assign(packet, packet + len);
frame.duration = duration;
{
void discord_voice_client::send(const char* packet, size_t len, uint64_t duration, bool send_now) {
if (!send_now) [[likely]] {
voice_out_packet frame;
frame.packet.assign(packet, packet + len);
frame.duration = duration;

std::lock_guard<std::mutex> lock(this->stream_mutex);
outbuf.emplace_back(frame);
} else [[unlikely]] {
this->udp_send(packet, len);
}
}

Expand All @@ -68,4 +71,4 @@ int discord_voice_client::udp_recv(char* data, size_t max_length)
return static_cast<int>(recv(this->fd, data, static_cast<int>(max_length), 0));
}

}
}
9 changes: 8 additions & 1 deletion src/dpp/voice/enabled/write_ready.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@ void discord_voice_client::write_ready() {
send_audio_type_t type = satype_recorded_audio;
{
std::lock_guard<std::mutex> lock(this->stream_mutex);
if (!this->paused && outbuf.size()) {
if (this->paused) {
if (!this->sent_stop_frames) {
this->send_stop_frames(true);
this->sent_stop_frames = true;
}

/* Fallthrough if paused */
} else if (outbuf.size()) {
type = send_audio_type;
if (outbuf[0].packet.size() == sizeof(uint16_t) && (*(reinterpret_cast<uint16_t*>(outbuf[0].packet.data()))) == AUDIO_TRACK_MARKER) {
outbuf.erase(outbuf.begin());
Expand Down

0 comments on commit 445542e

Please sign in to comment.