From 1e9c32dfef83eab86ab88ded659f2321deeb06ac Mon Sep 17 00:00:00 2001
From: Dylan Martin <dylan@posthog.com>
Date: Tue, 27 Aug 2024 01:35:18 -0400
Subject: [PATCH] feat(capture): handle empty string UUIDs for `/batch`
 endpoint (#24586)

---
 rust/capture/src/v0_request.rs | 57 +++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/rust/capture/src/v0_request.rs b/rust/capture/src/v0_request.rs
index c0d5f36d3577f..ae0c80fece453 100644
--- a/rust/capture/src/v0_request.rs
+++ b/rust/capture/src/v0_request.rs
@@ -3,7 +3,7 @@ use std::io::prelude::*;
 
 use bytes::{Buf, Bytes};
 use flate2::read::GzDecoder;
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Deserializer, Serialize};
 use serde_json::Value;
 use time::format_description::well_known::Iso8601;
 use time::OffsetDateTime;
@@ -56,6 +56,20 @@ pub struct EventFormData {
     pub data: String,
 }
 
+pub fn empty_string_is_none<'de, D>(deserializer: D) -> Result<Option<Uuid>, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let opt = Option::<String>::deserialize(deserializer)?;
+    match opt {
+        None => Ok(None),
+        Some(s) if s.is_empty() => Ok(None),
+        Some(s) => Uuid::parse_str(&s)
+            .map(Some)
+            .map_err(serde::de::Error::custom),
+    }
+}
+
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct RawEvent {
     #[serde(
@@ -66,6 +80,7 @@ pub struct RawEvent {
     pub token: Option<String>,
     #[serde(alias = "$distinct_id", skip_serializing_if = "Option::is_none")]
     pub distinct_id: Option<Value>, // posthog-js accepts arbitrary values as distinct_id
+    #[serde(default, deserialize_with = "empty_string_is_none")]
     pub uuid: Option<Uuid>,
     pub event: String,
     #[serde(default)]
@@ -238,15 +253,30 @@ pub struct ProcessingContext {
 #[cfg(test)]
 mod tests {
     use crate::token::InvalidTokenReason;
+    use crate::v0_request::empty_string_is_none;
     use base64::Engine as _;
     use bytes::Bytes;
     use rand::distributions::Alphanumeric;
     use rand::Rng;
+    use serde::Deserialize;
     use serde_json::json;
+    use serde_json::Value;
+    use uuid::Uuid;
 
     use super::CaptureError;
     use super::RawRequest;
 
+    fn test_deserialize(json: Value) -> Result<Option<Uuid>, serde_json::Error> {
+        #[derive(Deserialize)]
+        struct TestStruct {
+            #[serde(deserialize_with = "empty_string_is_none")]
+            uuid: Option<Uuid>,
+        }
+
+        let result: TestStruct = serde_json::from_value(json)?;
+        Ok(result.uuid)
+    }
+
     #[test]
     fn decode_uncompressed_raw_event() {
         let base64_payload = "ewogICAgImRpc3RpbmN0X2lkIjogIm15X2lkMSIsCiAgICAiZXZlbnQiOiAibXlfZXZlbnQxIiwKICAgICJwcm9wZXJ0aWVzIjogewogICAgICAgICIkZGV2aWNlX3R5cGUiOiAiRGVza3RvcCIKICAgIH0sCiAgICAiYXBpX2tleSI6ICJteV90b2tlbjEiCn0K";
@@ -431,4 +461,29 @@ mod tests {
         assert_extracted_token(r#"{"event":"e","$token":"single_token"}"#, "single_token");
         assert_extracted_token(r#"{"event":"e","api_key":"single_token"}"#, "single_token");
     }
+
+    #[test]
+    fn test_empty_uuid_string_is_none() {
+        let json = serde_json::json!({"uuid": ""});
+        let result = test_deserialize(json);
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap(), None);
+    }
+
+    #[test]
+    fn test_valid_uuid_is_some() {
+        let valid_uuid = "550e8400-e29b-41d4-a716-446655440000";
+        let json = serde_json::json!({"uuid": valid_uuid});
+        let result = test_deserialize(json);
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap(), Some(Uuid::parse_str(valid_uuid).unwrap()));
+    }
+
+    #[test]
+    fn test_invalid_uuid_is_error() {
+        let invalid_uuid = "not-a-uuid";
+        let json = serde_json::json!({"uuid": invalid_uuid});
+        let result = test_deserialize(json);
+        assert!(result.is_err());
+    }
 }