From d742b71db6242a3a2c93620fd284ff7a900a3fcf Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sat, 21 Sep 2024 13:22:01 +0700
Subject: [PATCH] feat: initial receive

---
 src/dpp/discordvoiceclient.cpp | 247 ++++++++++++++++-----------------
 1 file changed, 123 insertions(+), 124 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 8b0dba0bca..63d56be01b 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -716,145 +716,144 @@ void discord_voice_client::read_ready()
 	uint8_t buffer[65535];
 	int packet_size = this->udp_recv((char*)buffer, sizeof(buffer));
 
-	if (packet_size > 0 && (!creator->on_voice_receive.empty() || !creator->on_voice_receive_combined.empty())) {
-		constexpr size_t header_size = 12;
-		if (static_cast<size_t>(packet_size) < header_size) {
-			/* Invalid RTP payload */
-			return;
-		}
+	bool receive_handler_is_empty = creator->on_voice_receive.empty() && creator->on_voice_receive_combined.empty();
+	if (packet_size <= 0 || receive_handler_is_empty) {
+		/* Nothing to do */
+		return;
+	}
 
-		/* It's a "silence packet" - throw it away. */
-		if (packet_size < 44) {
-			return;
-		}
+	constexpr size_t header_size = 12;
+	if (static_cast<size_t>(packet_size) < header_size) {
+		/* Invalid RTP payload */
+		return;
+	}
 
-		if (uint8_t payload_type = buffer[1] & 0b0111'1111;
-		    72 <= payload_type && payload_type <= 76) {
-			/*
-			 * This is an RTCP payload. Discord is known to send
-			 * RTCP Receiver Reports.
-			 *
-			 * See https://datatracker.ietf.org/doc/html/rfc3551#section-6
-			 */
-			return;
-		}
+	/* It's a "silence packet" - throw it away. */
+	if (packet_size < 44) {
+		return;
+	}
 
-		voice_payload vp{0, // seq, populate later
-		                 0, // timestamp, populate later
-		                 std::make_unique<voice_receive_t>(nullptr, std::string((char*)buffer, packet_size))};
+	if (uint8_t payload_type = buffer[1] & 0b0111'1111;
+		72 <= payload_type && payload_type <= 76) {
+		/*
+		 * This is an RTCP payload. Discord is known to send
+		 * RTCP Receiver Reports.
+		 *
+		 * See https://datatracker.ietf.org/doc/html/rfc3551#section-6
+		 */
+		return;
+	}
 
-		vp.vr->voice_client = this;
+	voice_payload vp{0, // seq, populate later
+		0, // timestamp, populate later
+		std::make_unique<voice_receive_t>(nullptr, std::string((char*)buffer, packet_size))};
 
-		{	/* Get the User ID of the speaker */
-			uint32_t speaker_ssrc;
-			std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t));
-			speaker_ssrc = ntohl(speaker_ssrc);
-			vp.vr->user_id = ssrc_map[speaker_ssrc];
-		}
+	vp.vr->voice_client = this;
 
-		/* Get the sequence number of the voice UDP packet */
-		std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t));
-		vp.seq = ntohs(vp.seq);
-		/* Get the timestamp of the voice UDP packet */
-		std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t));
-		vp.timestamp = ntohl(vp.timestamp);
-
-		// nonce is 4 byte at the end of payload now
-		// change accordingly
-		// /* Nonce is the RTP Header with zero padding */
-		// uint8_t nonce[24] = { 0 };
-		// std::memcpy(nonce, buffer, header_size);
-
-		// /* Get the number of CSRC in header */
-		// const size_t csrc_count = buffer[0] & 0b0000'1111;
-		// /* Skip to the encrypted voice data */
-		// const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
-		// uint8_t* ciphertext = buffer + offset_to_data;
-		// const size_t ciphertext_len = packet_size - offset_to_data;
-
-		unsigned long long decrypted_len = 0;
-
-		// if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len,
-		// 						NULL,
-		// 						ciphertext, ciphertext_len,
-		// 						NULL,
-		// 						NULL,
-		// 						nonce, secret_key) != 0) {
-		// 		/* Invalid Discord RTP payload. */
-		// 		return;
-		// }
-
-		// if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0)
-
-		// 		if (crypto_secretbox_open_easy(encrypted_data, encrypted_data,
-		// 								encrypted_data_len, nonce, secret_key)) {
-		// 				/* Invalid Discord RTP payload. */
-		// 				return;
-		// 		}
-
-		// const uint8_t* decrypted_data = encrypted_data;
-		// size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
-		// if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
-		// 		/* Skip the RTP Extensions */
-		// 		size_t ext_len = 0;
-		// 		{
-		// 				uint16_t ext_len_in_words;
-		// 				memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t));
-		// 				ext_len_in_words = ntohs(ext_len_in_words);
-		// 				ext_len = sizeof(uint32_t) * ext_len_in_words;
-		// 		}
-		// 		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
-		// 		decrypted_data += ext_header_len + ext_len;
-		// 		decrypted_data_len -= ext_header_len + ext_len;
-		// }
+	{	/* Get the User ID of the speaker */
+		uint32_t speaker_ssrc;
+		std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t));
+		speaker_ssrc = ntohl(speaker_ssrc);
+		vp.vr->user_id = ssrc_map[speaker_ssrc];
+	}
 
-		/*
-		 * We're left with the decrypted, opus-encoded data.
-		 * Park the payload and decode on the voice courier thread.
-		 */
-		vp.vr->audio_data.assign(buffer, buffer + decrypted_len);
+	/* Get the sequence number of the voice UDP packet */
+	std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t));
+	vp.seq = ntohs(vp.seq);
+	/* Get the timestamp of the voice UDP packet */
+	std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t));
+	vp.timestamp = ntohl(vp.timestamp);
+
+	constexpr size_t nonce_size = sizeof(uint32_t);
+	/* Nonce is 4 byte at the end of payload with zero padding */
+	uint8_t nonce[24] = { 0 };
+	std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size);
+
+	/* Get the number of CSRC in header */
+	const size_t csrc_count = buffer[0] & 0b0000'1111;
+	/* Skip to the encrypted voice data */
+	const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
+	uint8_t* ciphertext = buffer + offset_to_data;
+	const size_t ciphertext_len = packet_size - offset_to_data - nonce_size;
+
+	uint8_t header[header_size] = { 0 };
+	memcpy(header, buffer, header_size);
+
+	unsigned long long decrypted_len = 0;
+	if (crypto_aead_xchacha20poly1305_ietf_decrypt(
+		buffer, &decrypted_len,
+		NULL,
+		ciphertext, ciphertext_len,
+		header,
+		header_size,
+		nonce, secret_key) != 0) {
+		/* Invalid Discord RTP payload. */
+		std::cout << "INVALID PACKET\n";
+		return;
+	}
 
-		{
-			std::lock_guard lk(voice_courier_shared_state.mtx);
-			auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id];
+	// const uint8_t* decrypted_data = buffer;
+	// size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
+	// if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
+	// 		/* Skip the RTP Extensions */
+	// 		size_t ext_len = 0;
+	// 		{
+	// 				uint16_t ext_len_in_words;
+	// 				memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t));
+	// 				ext_len_in_words = ntohs(ext_len_in_words);
+	// 				ext_len = sizeof(uint32_t) * ext_len_in_words;
+	// 		}
+	// 		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
+	// 		decrypted_data += ext_header_len + ext_len;
+	// 		decrypted_data_len -= ext_header_len + ext_len;
+	// }
+
+	/*
+	 * We're left with the decrypted, opus-encoded data.
+	 * Park the payload and decode on the voice courier thread.
+	 */
+	vp.vr->audio_data.assign(buffer, buffer + decrypted_len);
 
-			if (!decoder) {
-				/*
-				 * Most likely this is the first time we encounter this speaker.
-				 * Do some initialization for not only the decoder but also the range.
+	{
+		std::lock_guard lk(voice_courier_shared_state.mtx);
+		auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id];
+
+		if (!decoder) {
+			/*
+			 * Most likely this is the first time we encounter this speaker.
+			 * Do some initialization for not only the decoder but also the range.
+			 */
+			range.min_seq = vp.seq;
+			range.min_timestamp = vp.timestamp;
+
+			int opus_error = 0;
+			decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error),
+				 &opus_decoder_destroy);
+			if (opus_error) {
+				/**
+				 * NOTE: The -10 here makes the opus_error match up with values of exception_error_code,
+				 * which would otherwise conflict as every C library loves to use values from -1 downwards.
 				 */
-				range.min_seq = vp.seq;
-				range.min_timestamp = vp.timestamp;
-
-				int opus_error = 0;
-				decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error),
-				              &opus_decoder_destroy);
-				if (opus_error) {
-					/**
-					 * NOTE: The -10 here makes the opus_error match up with values of exception_error_code,
-					 * which would otherwise conflict as every C library loves to use values from -1 downwards.
-					 */
-					throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed");
-				}
+				throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed");
 			}
+		}
 
-			if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) {
-				/* This packet arrived too late. We can only discard it. */
-				return;
-			}
-			range.max_seq = vp.seq;
-			range.max_timestamp = vp.timestamp;
-			payload_queue.push(std::move(vp));
+		if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) {
+			/* This packet arrived too late. We can only discard it. */
+			return;
 		}
+		range.max_seq = vp.seq;
+		range.max_timestamp = vp.timestamp;
+		payload_queue.push(std::move(vp));
+	}
 
-		voice_courier_shared_state.signal_iteration.notify_one();
+	voice_courier_shared_state.signal_iteration.notify_one();
 
-		if (!voice_courier.joinable()) {
-			/* Courier thread is not running, start it */
-			voice_courier = std::thread(&voice_courier_loop,
-			                            std::ref(*this),
-			                            std::ref(voice_courier_shared_state));
-		}
+	if (!voice_courier.joinable()) {
+		/* Courier thread is not running, start it */
+		voice_courier = std::thread(&voice_courier_loop,
+							  std::ref(*this),
+							  std::ref(voice_courier_shared_state));
 	}
 #else
 	throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++");