From d742b71db6242a3a2c93620fd284ff7a900a3fcf Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Sat, 21 Sep 2024 13:22:01 +0700 Subject: [PATCH] feat: initial receive --- src/dpp/discordvoiceclient.cpp | 247 ++++++++++++++++----------------- 1 file changed, 123 insertions(+), 124 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 8b0dba0bca..63d56be01b 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -716,145 +716,144 @@ void discord_voice_client::read_ready() uint8_t buffer[65535]; int packet_size = this->udp_recv((char*)buffer, sizeof(buffer)); - if (packet_size > 0 && (!creator->on_voice_receive.empty() || !creator->on_voice_receive_combined.empty())) { - constexpr size_t header_size = 12; - if (static_cast(packet_size) < header_size) { - /* Invalid RTP payload */ - return; - } + bool receive_handler_is_empty = creator->on_voice_receive.empty() && creator->on_voice_receive_combined.empty(); + if (packet_size <= 0 || receive_handler_is_empty) { + /* Nothing to do */ + return; + } - /* It's a "silence packet" - throw it away. */ - if (packet_size < 44) { - return; - } + constexpr size_t header_size = 12; + if (static_cast(packet_size) < header_size) { + /* Invalid RTP payload */ + return; + } - if (uint8_t payload_type = buffer[1] & 0b0111'1111; - 72 <= payload_type && payload_type <= 76) { - /* - * This is an RTCP payload. Discord is known to send - * RTCP Receiver Reports. - * - * See https://datatracker.ietf.org/doc/html/rfc3551#section-6 - */ - return; - } + /* It's a "silence packet" - throw it away. */ + if (packet_size < 44) { + return; + } - voice_payload vp{0, // seq, populate later - 0, // timestamp, populate later - std::make_unique(nullptr, std::string((char*)buffer, packet_size))}; + if (uint8_t payload_type = buffer[1] & 0b0111'1111; + 72 <= payload_type && payload_type <= 76) { + /* + * This is an RTCP payload. Discord is known to send + * RTCP Receiver Reports. + * + * See https://datatracker.ietf.org/doc/html/rfc3551#section-6 + */ + return; + } - vp.vr->voice_client = this; + voice_payload vp{0, // seq, populate later + 0, // timestamp, populate later + std::make_unique(nullptr, std::string((char*)buffer, packet_size))}; - { /* Get the User ID of the speaker */ - uint32_t speaker_ssrc; - std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t)); - speaker_ssrc = ntohl(speaker_ssrc); - vp.vr->user_id = ssrc_map[speaker_ssrc]; - } + vp.vr->voice_client = this; - /* Get the sequence number of the voice UDP packet */ - std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t)); - vp.seq = ntohs(vp.seq); - /* Get the timestamp of the voice UDP packet */ - std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t)); - vp.timestamp = ntohl(vp.timestamp); - - // nonce is 4 byte at the end of payload now - // change accordingly - // /* Nonce is the RTP Header with zero padding */ - // uint8_t nonce[24] = { 0 }; - // std::memcpy(nonce, buffer, header_size); - - // /* Get the number of CSRC in header */ - // const size_t csrc_count = buffer[0] & 0b0000'1111; - // /* Skip to the encrypted voice data */ - // const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; - // uint8_t* ciphertext = buffer + offset_to_data; - // const size_t ciphertext_len = packet_size - offset_to_data; - - unsigned long long decrypted_len = 0; - - // if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len, - // NULL, - // ciphertext, ciphertext_len, - // NULL, - // NULL, - // nonce, secret_key) != 0) { - // /* Invalid Discord RTP payload. */ - // return; - // } - - // if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0) - - // if (crypto_secretbox_open_easy(encrypted_data, encrypted_data, - // encrypted_data_len, nonce, secret_key)) { - // /* Invalid Discord RTP payload. */ - // return; - // } - - // const uint8_t* decrypted_data = encrypted_data; - // size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; - // if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { - // /* Skip the RTP Extensions */ - // size_t ext_len = 0; - // { - // uint16_t ext_len_in_words; - // memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t)); - // ext_len_in_words = ntohs(ext_len_in_words); - // ext_len = sizeof(uint32_t) * ext_len_in_words; - // } - // constexpr size_t ext_header_len = sizeof(uint16_t) * 2; - // decrypted_data += ext_header_len + ext_len; - // decrypted_data_len -= ext_header_len + ext_len; - // } + { /* Get the User ID of the speaker */ + uint32_t speaker_ssrc; + std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t)); + speaker_ssrc = ntohl(speaker_ssrc); + vp.vr->user_id = ssrc_map[speaker_ssrc]; + } - /* - * We're left with the decrypted, opus-encoded data. - * Park the payload and decode on the voice courier thread. - */ - vp.vr->audio_data.assign(buffer, buffer + decrypted_len); + /* Get the sequence number of the voice UDP packet */ + std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t)); + vp.seq = ntohs(vp.seq); + /* Get the timestamp of the voice UDP packet */ + std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t)); + vp.timestamp = ntohl(vp.timestamp); + + constexpr size_t nonce_size = sizeof(uint32_t); + /* Nonce is 4 byte at the end of payload with zero padding */ + uint8_t nonce[24] = { 0 }; + std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size); + + /* Get the number of CSRC in header */ + const size_t csrc_count = buffer[0] & 0b0000'1111; + /* Skip to the encrypted voice data */ + const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; + uint8_t* ciphertext = buffer + offset_to_data; + const size_t ciphertext_len = packet_size - offset_to_data - nonce_size; + + uint8_t header[header_size] = { 0 }; + memcpy(header, buffer, header_size); + + unsigned long long decrypted_len = 0; + if (crypto_aead_xchacha20poly1305_ietf_decrypt( + buffer, &decrypted_len, + NULL, + ciphertext, ciphertext_len, + header, + header_size, + nonce, secret_key) != 0) { + /* Invalid Discord RTP payload. */ + std::cout << "INVALID PACKET\n"; + return; + } - { - std::lock_guard lk(voice_courier_shared_state.mtx); - auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id]; + // const uint8_t* decrypted_data = buffer; + // size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; + // if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { + // /* Skip the RTP Extensions */ + // size_t ext_len = 0; + // { + // uint16_t ext_len_in_words; + // memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t)); + // ext_len_in_words = ntohs(ext_len_in_words); + // ext_len = sizeof(uint32_t) * ext_len_in_words; + // } + // constexpr size_t ext_header_len = sizeof(uint16_t) * 2; + // decrypted_data += ext_header_len + ext_len; + // decrypted_data_len -= ext_header_len + ext_len; + // } + + /* + * We're left with the decrypted, opus-encoded data. + * Park the payload and decode on the voice courier thread. + */ + vp.vr->audio_data.assign(buffer, buffer + decrypted_len); - if (!decoder) { - /* - * Most likely this is the first time we encounter this speaker. - * Do some initialization for not only the decoder but also the range. + { + std::lock_guard lk(voice_courier_shared_state.mtx); + auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id]; + + if (!decoder) { + /* + * Most likely this is the first time we encounter this speaker. + * Do some initialization for not only the decoder but also the range. + */ + range.min_seq = vp.seq; + range.min_timestamp = vp.timestamp; + + int opus_error = 0; + decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error), + &opus_decoder_destroy); + if (opus_error) { + /** + * NOTE: The -10 here makes the opus_error match up with values of exception_error_code, + * which would otherwise conflict as every C library loves to use values from -1 downwards. */ - range.min_seq = vp.seq; - range.min_timestamp = vp.timestamp; - - int opus_error = 0; - decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error), - &opus_decoder_destroy); - if (opus_error) { - /** - * NOTE: The -10 here makes the opus_error match up with values of exception_error_code, - * which would otherwise conflict as every C library loves to use values from -1 downwards. - */ - throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed"); - } + throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed"); } + } - if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) { - /* This packet arrived too late. We can only discard it. */ - return; - } - range.max_seq = vp.seq; - range.max_timestamp = vp.timestamp; - payload_queue.push(std::move(vp)); + if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) { + /* This packet arrived too late. We can only discard it. */ + return; } + range.max_seq = vp.seq; + range.max_timestamp = vp.timestamp; + payload_queue.push(std::move(vp)); + } - voice_courier_shared_state.signal_iteration.notify_one(); + voice_courier_shared_state.signal_iteration.notify_one(); - if (!voice_courier.joinable()) { - /* Courier thread is not running, start it */ - voice_courier = std::thread(&voice_courier_loop, - std::ref(*this), - std::ref(voice_courier_shared_state)); - } + if (!voice_courier.joinable()) { + /* Courier thread is not running, start it */ + voice_courier = std::thread(&voice_courier_loop, + std::ref(*this), + std::ref(voice_courier_shared_state)); } #else throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++");