From 445542eb92d5d46439e612d3db0789ba5cbf97b0 Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Fri, 18 Oct 2024 00:20:58 +0700 Subject: [PATCH] feat: implement sending stop frames on pause/stop, TODO: make voice receive smooth while sending audio --- include/dpp/discordvoiceclient.h | 35 +++++++++++++++++++++++--- src/dpp/discordvoiceclient.cpp | 18 ++++++++++++- src/dpp/voice/enabled/courier_loop.cpp | 16 +++++++++++- src/dpp/voice/enabled/opus.cpp | 8 +++--- src/dpp/voice/enabled/read_write.cpp | 17 +++++++------ src/dpp/voice/enabled/write_ready.cpp | 9 ++++++- 6 files changed, 86 insertions(+), 17 deletions(-) diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h index 658ba6422a..3a5f457938 100644 --- a/include/dpp/discordvoiceclient.h +++ b/include/dpp/discordvoiceclient.h @@ -240,6 +240,11 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ void cleanup(); + /** + * @brief A frame of silence packet + */ + static constexpr uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe }; + /** * @brief Mutex for outbound packet stream */ @@ -434,6 +439,13 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ bool paused; + /** + * @brief Whether has sent 5 frame of silence before stopping on pause/stop. + * + * This is to avoid unintended Opus interpolation with subsequent transmissions. + */ + bool sent_stop_frames; + #ifdef HAVE_VOICE /** * @brief libopus encoder @@ -650,8 +662,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client * @param packet packet data * @param len length of packet * @param duration duration of opus packet + * @param send_now send this packet right away without buffering. + * Do NOT set send_now to true outside write_ready. */ - void send(const char* packet, size_t len, uint64_t duration); + void send(const char* packet, size_t len, uint64_t duration, bool send_now = false); /** * @brief Queue a message to be sent via the websocket @@ -962,6 +976,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client * @param duration Generally duration is 2.5, 5, 10, 20, 40 or 60 * if the timescale is 1000000 (1ms) * + * @param send_now Send this packet right away without buffering, + * this will skip duration calculation for the packet being sent + * and only safe to be set to true in write_ready. + * * @return discord_voice_client& Reference to self * * @note It is your responsibility to ensure that packets of data @@ -972,7 +990,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client * * @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++ */ - discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration); + discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now = false); /** * @brief Send opus packets to the voice channel @@ -999,7 +1017,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client * * @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++ */ - discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length); + discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length); /** * @brief Send silence to the voice channel @@ -1012,6 +1030,17 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ discord_voice_client& send_silence(const uint64_t duration); + /** + * @brief Send stop frames to the voice channel. + * + * @param send_now send this packet right away without buffering. + * Do NOT set send_now to true outside write_ready. + * + * @return discord_voice_client& Reference to self + * @throw dpp::voice_exception if voice support is not compiled into D++ + */ + discord_voice_client& send_stop_frames(bool send_now = false); + /** * @brief Sets the audio type that will be sent with send_audio_* methods. * diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 1b8fe7b95f..d63cccc6a6 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -20,6 +20,7 @@ * ************************************************************************************/ +#include #ifdef _WIN32 #include #include @@ -148,6 +149,9 @@ bool discord_voice_client::is_end_to_end_encrypted() const { discord_voice_client& discord_voice_client::pause_audio(bool pause) { this->paused = pause; + if (!this->paused) { + this->sent_stop_frames = false; + } return *this; } @@ -176,6 +180,7 @@ discord_voice_client& discord_voice_client::stop_audio() { outbuf.clear(); track_meta.clear(); tracks = 0; + this->send_stop_frames(); return *this; } @@ -398,7 +403,6 @@ discord_voice_client& discord_voice_client::skip_to_next_marker() { } discord_voice_client& discord_voice_client::send_silence(const uint64_t duration) { - uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe }; send_audio_opus(silence_packet, 3, duration); return *this; } @@ -412,6 +416,7 @@ discord_voice_client& discord_voice_client::set_send_audio_type(send_audio_type_ discord_voice_client& discord_voice_client::speak() { if (!this->sending) { + std::cout << "Sending voice_opcode_client_speaking\n"; this->queue_message(json({ {"op", voice_opcode_client_speaking}, {"d", { @@ -443,4 +448,15 @@ uint16_t discord_voice_client::get_iteration_interval() { return this->iteration_interval; } +discord_voice_client& discord_voice_client::send_stop_frames(bool send_now) { + uint8_t silence_frames[sizeof(silence_packet) / sizeof(*silence_packet) * 5]; + for (size_t i = 0; i < sizeof(silence_frames) / sizeof(*silence_frames); i++) { + silence_frames[i] = silence_packet[i % 3]; + } + + this->send_audio_opus(silence_frames, sizeof(silence_frames) / sizeof(*silence_frames), 20, send_now); + + return *this; +} + } // namespace dpp diff --git a/src/dpp/voice/enabled/courier_loop.cpp b/src/dpp/voice/enabled/courier_loop.cpp index 6526c7f8ba..57b5e99652 100644 --- a/src/dpp/voice/enabled/courier_loop.cpp +++ b/src/dpp/voice/enabled/courier_loop.cpp @@ -76,7 +76,21 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour break; } - shared_state.signal_iteration.wait(lk); + shared_state.signal_iteration.wait(lk, [&shared_state](){ + /* + * Actually check the state we're looking for instead of waking up + * everytime read_ready was called. + */ + for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) { + if (parking_lot.parked_payloads.empty()) { + continue; + } + + return true; + } + return false; + }); + /* * More data came or about to terminate, or just a spurious wake. * We need to collect the payloads again to determine what to do next. diff --git a/src/dpp/voice/enabled/opus.cpp b/src/dpp/voice/enabled/opus.cpp index d99a3776d0..a92886ac9c 100644 --- a/src/dpp/voice/enabled/opus.cpp +++ b/src/dpp/voice/enabled/opus.cpp @@ -71,14 +71,14 @@ discord_voice_client& discord_voice_client::send_audio_raw(uint16_t* audio_data, return *this; } -discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length) { +discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length) { int samples = opus_packet_get_nb_samples(opus_packet, (opus_int32)length, opus_sample_rate_hz); uint64_t duration = (samples / 48) / (timescale / 1000000); - send_audio_opus(opus_packet, length, duration); + send_audio_opus(opus_packet, length, duration, false); return *this; } -discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) { +discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now) { int frame_size = (int)(48 * duration * (timescale / 1000000)); opus_int32 encoded_audio_max_length = (opus_int32)length; std::vector encoded_audio(encoded_audio_max_length); @@ -147,7 +147,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet /* Append the 4 byte nonce to the resulting payload */ std::memcpy(payload.data() + payload.size() - sizeof(noncel), &noncel, sizeof(noncel)); - this->send(reinterpret_cast(payload.data()), payload.size(), duration); + this->send(reinterpret_cast(payload.data()), payload.size(), duration, send_now); timestamp += frame_size; diff --git a/src/dpp/voice/enabled/read_write.cpp b/src/dpp/voice/enabled/read_write.cpp index c24200b49b..52a09d5a39 100644 --- a/src/dpp/voice/enabled/read_write.cpp +++ b/src/dpp/voice/enabled/read_write.cpp @@ -30,7 +30,7 @@ namespace dpp { dpp::socket discord_voice_client::want_write() { std::lock_guard lock(this->stream_mutex); - if (!this->paused && !outbuf.empty()) { + if (!this->sent_stop_frames && !outbuf.empty()) { return fd; } return INVALID_SOCKET; @@ -42,13 +42,16 @@ dpp::socket discord_voice_client::want_read() { } -void discord_voice_client::send(const char* packet, size_t len, uint64_t duration) { - voice_out_packet frame; - frame.packet.assign(packet, packet + len); - frame.duration = duration; - { +void discord_voice_client::send(const char* packet, size_t len, uint64_t duration, bool send_now) { + if (!send_now) [[likely]] { + voice_out_packet frame; + frame.packet.assign(packet, packet + len); + frame.duration = duration; + std::lock_guard lock(this->stream_mutex); outbuf.emplace_back(frame); + } else [[unlikely]] { + this->udp_send(packet, len); } } @@ -68,4 +71,4 @@ int discord_voice_client::udp_recv(char* data, size_t max_length) return static_cast(recv(this->fd, data, static_cast(max_length), 0)); } -} \ No newline at end of file +} diff --git a/src/dpp/voice/enabled/write_ready.cpp b/src/dpp/voice/enabled/write_ready.cpp index 46c0307055..3fda474f45 100644 --- a/src/dpp/voice/enabled/write_ready.cpp +++ b/src/dpp/voice/enabled/write_ready.cpp @@ -37,7 +37,14 @@ void discord_voice_client::write_ready() { send_audio_type_t type = satype_recorded_audio; { std::lock_guard lock(this->stream_mutex); - if (!this->paused && outbuf.size()) { + if (this->paused) { + if (!this->sent_stop_frames) { + this->send_stop_frames(true); + this->sent_stop_frames = true; + } + + /* Fallthrough if paused */ + } else if (outbuf.size()) { type = send_audio_type; if (outbuf[0].packet.size() == sizeof(uint16_t) && (*(reinterpret_cast(outbuf[0].packet.data()))) == AUDIO_TRACK_MARKER) { outbuf.erase(outbuf.begin());