diff --git a/crates/loro-internal/src/encoding/json_schema.rs b/crates/loro-internal/src/encoding/json_schema.rs index b7b37c841..3e3b3e1ee 100644 --- a/crates/loro-internal/src/encoding/json_schema.rs +++ b/crates/loro-internal/src/encoding/json_schema.rs @@ -1,12 +1,7 @@ -use std::sync::Arc; - -use either::Either; -use loro_common::{ - ContainerID, ContainerType, CounterSpan, HasCounter, HasCounterSpan, IdLp, IdSpanVector, - LoroError, LoroResult, LoroValue, PeerID, TreeID, ID, +use super::{ + outdated_encode_reordered::{import_changes_to_oplog, ImportChangesResult, ValueRegister}, + ImportStatus, }; -use rle::{HasLength, RleVec, Sliceable}; - use crate::{ arena::SharedArena, change::Change, @@ -21,12 +16,14 @@ use crate::{ version::Frontiers, OpLog, VersionVector, }; - -use super::{ - outdated_encode_reordered::{import_changes_to_oplog, ImportChangesResult, ValueRegister}, - ImportStatus, -}; +use either::Either; use json::{JsonOpContent, JsonSchema}; +use loro_common::{ + ContainerID, ContainerType, CounterSpan, HasCounter, HasCounterSpan, IdLp, IdSpanVector, + LoroError, LoroResult, LoroValue, PeerID, TreeID, ID, +}; +use rle::{HasLength, RleVec, Sliceable}; +use std::sync::Arc; const SCHEMA_VERSION: u8 = 1; @@ -229,9 +226,9 @@ fn encode_changes( ContainerType::List => match content { InnerContent::List(list) => JsonOpContent::List(match list { InnerListOp::Insert { slice, pos } => { - let mut value = + let mut values = arena.get_values(slice.0.start as usize..slice.0.end as usize); - value.iter_mut().for_each(|x| { + values.iter_mut().for_each(|x| { if let LoroValue::Container(id) = x { if id.is_normal() { *id = register_container_id(id.clone(), peer_register); @@ -240,7 +237,7 @@ fn encode_changes( }); json::ListOp::Insert { pos: *pos as u32, - value: value.into(), + value: values, } } InnerListOp::Delete(DeleteSpanWithId { @@ -258,9 +255,9 @@ fn encode_changes( ContainerType::MovableList => match content { InnerContent::List(list) => JsonOpContent::MovableList(match list { InnerListOp::Insert { slice, pos } => { - let mut value = + let mut values = arena.get_values(slice.0.start as usize..slice.0.end as usize); - value.iter_mut().for_each(|x| { + values.iter_mut().for_each(|x| { if let LoroValue::Container(id) = x { if id.is_normal() { *id = register_container_id(id.clone(), peer_register); @@ -269,7 +266,7 @@ fn encode_changes( }); json::MovableListOp::Insert { pos: *pos as u32, - value: value.into(), + value: values, } } InnerListOp::Delete(DeleteSpanWithId { @@ -537,9 +534,11 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &[PeerID]) -> LoroRes }, ContainerType::List => match content { JsonOpContent::List(list) => match list { - json::ListOp::Insert { pos, value } => { - let mut values = value.into_list().unwrap(); - Arc::make_mut(&mut values).iter_mut().for_each(|v| { + json::ListOp::Insert { + pos, + value: mut values, + } => { + values.iter_mut().for_each(|v| { if let LoroValue::Container(id) = v { if id.is_normal() { *id = convert_container_id(id.clone(), peers); @@ -566,9 +565,11 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &[PeerID]) -> LoroRes }, ContainerType::MovableList => match content { JsonOpContent::MovableList(list) => match list { - json::MovableListOp::Insert { pos, value } => { - let mut values = value.into_list().unwrap(); - Arc::make_mut(&mut values).iter_mut().for_each(|v| { + json::MovableListOp::Insert { + pos, + value: mut values, + } => { + values.iter_mut().for_each(|v| { if let LoroValue::Container(id) = v { if id.is_normal() { *id = convert_container_id(id.clone(), peers); @@ -717,12 +718,16 @@ impl TryFrom for JsonSchema { } pub mod json { - + use crate::{ + encoding::OwnedValue, + version::{Frontiers, VersionRange}, + }; use fractional_index::FractionalIndex; - use loro_common::{ContainerID, IdLp, Lamport, LoroValue, PeerID, TreeID, ID}; + use loro_common::{ContainerID, Counter, IdLp, Lamport, LoroValue, PeerID, TreeID, ID}; use serde::{Deserialize, Serialize}; + use std::ops::Range; - use crate::{encoding::OwnedValue, version::Frontiers}; + use super::redact_value; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct JsonSchema { @@ -746,6 +751,13 @@ pub mod json { pub ops: Vec, } + impl JsonChange { + pub fn op_len(&self) -> usize { + let last_op = self.ops.last().unwrap(); + (last_op.counter - self.id.counter) as usize + last_op.content.op_len() + } + } + #[derive(Debug, Clone)] pub struct JsonOp { pub content: JsonOpContent, @@ -765,6 +777,19 @@ pub mod json { Future(FutureOpWrapper), } + impl JsonOpContent { + pub fn op_len(&self) -> usize { + match self { + JsonOpContent::List(list_op) => list_op.op_len(), + JsonOpContent::MovableList(movable_list_op) => movable_list_op.op_len(), + JsonOpContent::Map(..) => 1, + JsonOpContent::Text(text_op) => text_op.op_len(), + JsonOpContent::Tree(..) => 1, + JsonOpContent::Future(..) => 1, + } + } + } + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FutureOpWrapper { #[serde(flatten)] @@ -777,7 +802,7 @@ pub mod json { pub enum ListOp { Insert { pos: u32, - value: LoroValue, + value: Vec, }, Delete { pos: i32, @@ -787,12 +812,21 @@ pub mod json { }, } + impl ListOp { + fn op_len(&self) -> usize { + match self { + ListOp::Insert { value: values, .. } => values.len(), + ListOp::Delete { len, .. } => (*len).unsigned_abs() as usize, + } + } + } + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum MovableListOp { Insert { pos: u32, - value: LoroValue, + value: Vec, }, Delete { pos: i32, @@ -813,6 +847,17 @@ pub mod json { }, } + impl MovableListOp { + fn op_len(&self) -> usize { + match self { + MovableListOp::Insert { value: values, .. } => values.len(), + MovableListOp::Delete { len, .. } => (*len).unsigned_abs() as usize, + MovableListOp::Move { .. } => 1, + MovableListOp::Set { .. } => 1, + } + } + } + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum MapOp { @@ -843,6 +888,17 @@ pub mod json { MarkEnd, } + impl TextOp { + fn op_len(&self) -> usize { + match self { + TextOp::Insert { text, .. } => text.chars().count(), + TextOp::Delete { len, .. } => len.unsigned_abs() as usize, + TextOp::Mark { .. } => 1, + TextOp::MarkEnd => 1, + } + } + } + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum TreeOp { @@ -1178,6 +1234,157 @@ pub mod json { } } } + + #[derive(thiserror::Error, Debug, PartialEq, Eq)] + pub enum RedactError { + #[error("unknown operation type")] + UnknownOperationType, + } + + /// Redacts sensitive content within the specified range by replacing it with default values. + /// + /// This method applies the following redaction rules: + /// + /// - Preserves delete and move operations without changes + /// - Replaces text insertion content with the Unicode replacement character (U+FFFD) + /// - Substitutes list and map insert values with `LoroValue::Null` + /// - Maintains child container creation operations + /// - Replaces text mark values with `LoroValue::Null` + /// - Preserves map insertion and text annotation keys + /// - Resets counter operations to zero + /// - Leaves unknown operation types (from future Loro versions) unchanged + /// + /// This approach ensures sensitive data removal while preserving the document's overall + /// structure. Redacted documents maintain seamless collaboration capabilities with both + /// redacted and non-redacted versions. + pub fn redact(json: &mut JsonSchema, range: VersionRange) -> Result<(), RedactError> { + let peers = json.peers.clone(); + let mut errors = Vec::new(); + for change in json.changes.iter_mut() { + let real_peer = peers[change.id.peer as usize]; + let real_id = ID::new(real_peer, change.id.counter); + if !range.has_overlap_with(real_id.to_span(change.op_len())) { + continue; + } + + let redact_range = range.get(&real_peer).copied().unwrap(); + for op in change.ops.iter_mut() { + if op.counter >= redact_range.1 { + break; + } + + let len = op.content.op_len() as Counter; + if op.counter + len <= redact_range.0 { + continue; + } + + let result = redact_op( + &mut op.content, + (redact_range.0 - op.counter).max(0).min(len) + ..(redact_range.1 - op.counter).max(0).min(len), + ); + match result { + Ok(()) => {} + Err(e) => errors.push(e), + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors.pop().unwrap()) + } + } + + fn redact_op(op: &mut JsonOpContent, range: Range) -> Result<(), RedactError> { + match op { + JsonOpContent::List(list_op) => { + match list_op { + ListOp::Insert { value: values, .. } => { + for i in range { + redact_value(&mut values[i as usize]); + } + } + ListOp::Delete { .. } => { + // Delete op won't be changed + } + } + } + JsonOpContent::MovableList(movable_list_op) => { + match movable_list_op { + MovableListOp::Insert { value: values, .. } => { + for i in range { + redact_value(&mut values[i as usize]); + } + } + MovableListOp::Delete { .. } | MovableListOp::Move { .. } => { + // Delete and move ops won't be changed + } + MovableListOp::Set { value, .. } => { + assert!(range.start == 0 && range.len() == 1); + redact_value(value); + } + } + } + JsonOpContent::Map(map_op) => { + match map_op { + MapOp::Insert { value, .. } => { + assert!(range.start == 0 && range.len() == 1); + redact_value(value); + } + MapOp::Delete { .. } => { + // Delete op won't be changed + } + } + } + JsonOpContent::Text(text_op) => { + match text_op { + TextOp::Insert { text, .. } => { + let mut chars = vec![]; + for (i, c) in text.chars().enumerate() { + if i < range.start as usize || i >= range.end as usize { + chars.push(c); + } else { + chars.push("� ".chars().next().unwrap()); + } + } + *text = chars.into_iter().collect(); + } + TextOp::Delete { .. } => { + // Delete op won't be changed + } + TextOp::Mark { style_value, .. } => { + assert!(range.start == 0 && range.len() == 1); + *style_value = LoroValue::Null; + } + TextOp::MarkEnd => { + // MarkEnd won't be changed + } + } + } + JsonOpContent::Tree(..) => { + // Creation of child container won't be changed + } + JsonOpContent::Future(future_op_wrapper) => match &mut future_op_wrapper.value { + FutureOp::Counter(owned_value) => { + *owned_value = OwnedValue::I64(0); + } + FutureOp::Unknown(..) => { + return Err(RedactError::UnknownOperationType); + } + }, + } + + Ok(()) + } +} + +fn redact_value(v: &mut LoroValue) { + match v { + LoroValue::Container(_) => {} + _ => *v = LoroValue::Null, + } } #[cfg(test)] diff --git a/crates/loro-internal/src/version.rs b/crates/loro-internal/src/version.rs index 2d192eab8..539587993 100644 --- a/crates/loro-internal/src/version.rs +++ b/crates/loro-internal/src/version.rs @@ -73,6 +73,23 @@ impl VersionRange { true } + pub fn has_overlap_with(&self, mut span: IdSpan) -> bool { + span.normalize_(); + if let Some((start, end)) = self.get(&span.peer) { + start < &span.counter.end && end > &span.counter.start + } else { + false + } + } + + pub fn contains_id(&self, id: ID) -> bool { + if let Some((start, end)) = self.get(&id.peer) { + start <= &id.counter && end > &id.counter + } else { + false + } + } + pub fn contains_id_span(&self, mut span: IdSpan) -> bool { span.normalize_(); if let Some((start, end)) = self.get(&span.peer) { diff --git a/crates/loro/tests/integration_test/mod.rs b/crates/loro/tests/integration_test/mod.rs index 38728af6b..dab2e1bf0 100644 --- a/crates/loro/tests/integration_test/mod.rs +++ b/crates/loro/tests/integration_test/mod.rs @@ -3,6 +3,7 @@ use loro::LoroDoc; mod detached_editing_test; #[cfg(feature = "jsonpath")] mod jsonpath_test; +mod redact_test; mod shallow_snapshot_test; mod snapshot_at_test; mod text_update_test; diff --git a/crates/loro/tests/integration_test/redact_test.rs b/crates/loro/tests/integration_test/redact_test.rs new file mode 100644 index 000000000..95239af8c --- /dev/null +++ b/crates/loro/tests/integration_test/redact_test.rs @@ -0,0 +1,292 @@ +use loro::json::redact; +use loro::{LoroDoc, LoroList, LoroMovableList, LoroTree, LoroValue}; +use loro_internal::version::VersionRange; + +#[test] +fn redact_text_doc() { + let doc = LoroDoc::new(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + // |-----------------------| <- 24 ops + text.insert(0, "Hello, world! This is a secret message.") + .unwrap(); + + let mut json = doc.export_json_updates(&Default::default(), &doc.oplog_vv()); + let mut range = VersionRange::new(); + range.insert(1, 24, 30); + redact(&mut json, range).unwrap(); + let redacted_doc = LoroDoc::new(); + redacted_doc.import_json_updates(json).unwrap(); + let redacted_text = redacted_doc.get_text("text"); + assert_eq!( + redacted_text.to_string(), + "Hello, world! This is a ������ message." + ); + assert_ne!(text.to_string(), redacted_text.to_string()); +} + +#[test] +fn redact_map_list_insertions() { + let doc = LoroDoc::new(); + doc.set_peer_id(1).unwrap(); + let map = doc.get_map("map"); + let list = doc.get_list("list"); + + // Insert into map + map.insert("key1", "sensitive data").unwrap(); + map.insert("key2", 42).unwrap(); + + // Insert into list + list.insert(0, "secret info").unwrap(); + list.insert(1, true).unwrap(); + + let mut json = doc.export_json_updates(&Default::default(), &doc.oplog_vv()); + let mut range = VersionRange::new(); + range.insert(1, 0, 5); // Redact all operations + redact(&mut json, range).unwrap(); + + let s = serde_json::to_string_pretty(&json).unwrap(); + let expected = r#"{ + "schema_version": 1, + "start_version": {}, + "peers": [ + "1" + ], + "changes": [ + { + "id": "0@0", + "timestamp": 0, + "deps": [], + "lamport": 0, + "msg": null, + "ops": [ + { + "container": "cid:root-map:Map", + "content": { + "type": "insert", + "key": "key1", + "value": null + }, + "counter": 0 + }, + { + "container": "cid:root-map:Map", + "content": { + "type": "insert", + "key": "key2", + "value": null + }, + "counter": 1 + }, + { + "container": "cid:root-list:List", + "content": { + "type": "insert", + "pos": 0, + "value": [ + null, + null + ] + }, + "counter": 2 + } + ] + } + ] +}"#; + assert_eq!(s, expected); + let redacted_doc = LoroDoc::new(); + redacted_doc.import_json_updates(json).unwrap(); + + let redacted_map = redacted_doc.get_map("map"); + let redacted_list = redacted_doc.get_list("list"); + + // Check map values + assert_eq!( + redacted_map.get("key1").unwrap().into_value().unwrap(), + LoroValue::Null + ); + assert_eq!( + redacted_map.get("key2").unwrap().into_value().unwrap(), + LoroValue::Null + ); + + // Check list values + assert_eq!( + redacted_list.get(0).unwrap().into_value().unwrap(), + LoroValue::Null + ); + assert_eq!( + redacted_list.get(1).unwrap().into_value().unwrap(), + LoroValue::Null + ); +} + +#[test] +fn redact_movable_list() { + let doc = LoroDoc::new(); + doc.set_peer_id(1).unwrap(); + let list = doc.get_movable_list("movable_list"); + list.insert(0, "sensitive data 1").unwrap(); + list.insert(1, "sensitive data 2").unwrap(); + list.set(0, "updated sensitive data").unwrap(); + + let mut json = doc.export_json_updates(&Default::default(), &doc.oplog_vv()); + let mut range = VersionRange::new(); + range.insert(1, 0, 3); + redact(&mut json, range).unwrap(); + let redacted_json = serde_json::to_string_pretty(&json).unwrap(); + assert_eq!( + redacted_json, + r#"{ + "schema_version": 1, + "start_version": {}, + "peers": [ + "1" + ], + "changes": [ + { + "id": "0@0", + "timestamp": 0, + "deps": [], + "lamport": 0, + "msg": null, + "ops": [ + { + "container": "cid:root-movable_list:MovableList", + "content": { + "type": "insert", + "pos": 0, + "value": [ + null, + null + ] + }, + "counter": 0 + }, + { + "container": "cid:root-movable_list:MovableList", + "content": { + "type": "set", + "elem_id": "L0@0", + "value": null + }, + "counter": 2 + } + ] + } + ] +}"# + ); + + // Create a new document from the redacted JSON + let redacted_doc = LoroDoc::new(); + redacted_doc.import_json_updates(&redacted_json).unwrap(); + + let redacted_list = redacted_doc.get_movable_list("movable_list"); + + // Check that the insert operations were redacted + assert_eq!( + redacted_list.get(0).unwrap().into_value().unwrap(), + LoroValue::Null + ); + assert_eq!( + redacted_list.get(1).unwrap().into_value().unwrap(), + LoroValue::Null + ); + + // Check that the set operation was redacted + // The set operation should have replaced the first null value with another null + assert_eq!( + redacted_list.get(0).unwrap().into_value().unwrap(), + LoroValue::Null + ); + + // Verify the list length is correct + assert_eq!(redacted_list.len(), 2); + + // Optionally, you can print the redacted JSON to inspect it + // println!("{}", redacted_json); +} + +#[test] +fn redact_should_keep_parent_child_relationship() { + let doc = LoroDoc::new(); + doc.set_peer_id(1).unwrap(); + let map = doc.get_map("map"); + let list = map.insert_container("list", LoroList::new()).unwrap(); + let tree = list.insert_container(0, LoroTree::new()).unwrap(); + let node = tree.create(None).unwrap(); + let sub_map = tree.get_meta(node).unwrap(); + let _m = sub_map + .insert_container("ll", LoroMovableList::new()) + .unwrap(); + let mut json = doc.export_json_updates(&Default::default(), &doc.oplog_vv()); + let mut range = VersionRange::new(); + range.insert(1, 0, 100); + redact(&mut json, range).unwrap(); + let redacted_json = serde_json::to_string_pretty(&json).unwrap(); + pretty_assertions::assert_eq!( + redacted_json, + r#"{ + "schema_version": 1, + "start_version": {}, + "peers": [ + "1" + ], + "changes": [ + { + "id": "0@0", + "timestamp": 0, + "deps": [], + "lamport": 0, + "msg": null, + "ops": [ + { + "container": "cid:root-map:Map", + "content": { + "type": "insert", + "key": "list", + "value": "🦜:cid:0@0:List" + }, + "counter": 0 + }, + { + "container": "cid:0@0:List", + "content": { + "type": "insert", + "pos": 0, + "value": [ + "🦜:cid:1@0:Tree" + ] + }, + "counter": 1 + }, + { + "container": "cid:1@0:Tree", + "content": { + "type": "create", + "target": "2@0", + "parent": null, + "fractional_index": "80" + }, + "counter": 2 + }, + { + "container": "cid:2@0:Map", + "content": { + "type": "insert", + "key": "ll", + "value": "🦜:cid:3@0:MovableList" + }, + "counter": 3 + } + ] + } + ] +}"# + ); + let new_doc = LoroDoc::new(); + new_doc.import_json_updates(&redacted_json).unwrap(); + assert_eq!(new_doc.get_deep_value(), doc.get_deep_value()); +}