Skip to content

Commit

Permalink
feat: redact (#504)
Browse files Browse the repository at this point in the history
  • Loading branch information
zxch3n authored Oct 10, 2024
1 parent 596374d commit f2365a8
Show file tree
Hide file tree
Showing 4 changed files with 547 additions and 30 deletions.
267 changes: 237 additions & 30 deletions crates/loro-internal/src/encoding/json_schema.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
use std::sync::Arc;

use either::Either;
use loro_common::{
ContainerID, ContainerType, CounterSpan, HasCounter, HasCounterSpan, IdLp, IdSpanVector,
LoroError, LoroResult, LoroValue, PeerID, TreeID, ID,
use super::{
outdated_encode_reordered::{import_changes_to_oplog, ImportChangesResult, ValueRegister},
ImportStatus,
};
use rle::{HasLength, RleVec, Sliceable};

use crate::{
arena::SharedArena,
change::Change,
Expand All @@ -21,12 +16,14 @@ use crate::{
version::Frontiers,
OpLog, VersionVector,
};

use super::{
outdated_encode_reordered::{import_changes_to_oplog, ImportChangesResult, ValueRegister},
ImportStatus,
};
use either::Either;
use json::{JsonOpContent, JsonSchema};
use loro_common::{
ContainerID, ContainerType, CounterSpan, HasCounter, HasCounterSpan, IdLp, IdSpanVector,
LoroError, LoroResult, LoroValue, PeerID, TreeID, ID,
};
use rle::{HasLength, RleVec, Sliceable};
use std::sync::Arc;

const SCHEMA_VERSION: u8 = 1;

Expand Down Expand Up @@ -229,9 +226,9 @@ fn encode_changes(
ContainerType::List => match content {
InnerContent::List(list) => JsonOpContent::List(match list {
InnerListOp::Insert { slice, pos } => {
let mut value =
let mut values =
arena.get_values(slice.0.start as usize..slice.0.end as usize);
value.iter_mut().for_each(|x| {
values.iter_mut().for_each(|x| {
if let LoroValue::Container(id) = x {
if id.is_normal() {
*id = register_container_id(id.clone(), peer_register);
Expand All @@ -240,7 +237,7 @@ fn encode_changes(
});
json::ListOp::Insert {
pos: *pos as u32,
value: value.into(),
value: values,
}
}
InnerListOp::Delete(DeleteSpanWithId {
Expand All @@ -258,9 +255,9 @@ fn encode_changes(
ContainerType::MovableList => match content {
InnerContent::List(list) => JsonOpContent::MovableList(match list {
InnerListOp::Insert { slice, pos } => {
let mut value =
let mut values =
arena.get_values(slice.0.start as usize..slice.0.end as usize);
value.iter_mut().for_each(|x| {
values.iter_mut().for_each(|x| {
if let LoroValue::Container(id) = x {
if id.is_normal() {
*id = register_container_id(id.clone(), peer_register);
Expand All @@ -269,7 +266,7 @@ fn encode_changes(
});
json::MovableListOp::Insert {
pos: *pos as u32,
value: value.into(),
value: values,
}
}
InnerListOp::Delete(DeleteSpanWithId {
Expand Down Expand Up @@ -537,9 +534,11 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &[PeerID]) -> LoroRes
},
ContainerType::List => match content {
JsonOpContent::List(list) => match list {
json::ListOp::Insert { pos, value } => {
let mut values = value.into_list().unwrap();
Arc::make_mut(&mut values).iter_mut().for_each(|v| {
json::ListOp::Insert {
pos,
value: mut values,
} => {
values.iter_mut().for_each(|v| {
if let LoroValue::Container(id) = v {
if id.is_normal() {
*id = convert_container_id(id.clone(), peers);
Expand All @@ -566,9 +565,11 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &[PeerID]) -> LoroRes
},
ContainerType::MovableList => match content {
JsonOpContent::MovableList(list) => match list {
json::MovableListOp::Insert { pos, value } => {
let mut values = value.into_list().unwrap();
Arc::make_mut(&mut values).iter_mut().for_each(|v| {
json::MovableListOp::Insert {
pos,
value: mut values,
} => {
values.iter_mut().for_each(|v| {
if let LoroValue::Container(id) = v {
if id.is_normal() {
*id = convert_container_id(id.clone(), peers);
Expand Down Expand Up @@ -717,12 +718,16 @@ impl TryFrom<String> for JsonSchema {
}

pub mod json {

use crate::{
encoding::OwnedValue,
version::{Frontiers, VersionRange},
};
use fractional_index::FractionalIndex;
use loro_common::{ContainerID, IdLp, Lamport, LoroValue, PeerID, TreeID, ID};
use loro_common::{ContainerID, Counter, IdLp, Lamport, LoroValue, PeerID, TreeID, ID};
use serde::{Deserialize, Serialize};
use std::ops::Range;

use crate::{encoding::OwnedValue, version::Frontiers};
use super::redact_value;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonSchema {
Expand All @@ -746,6 +751,13 @@ pub mod json {
pub ops: Vec<JsonOp>,
}

impl JsonChange {
pub fn op_len(&self) -> usize {
let last_op = self.ops.last().unwrap();
(last_op.counter - self.id.counter) as usize + last_op.content.op_len()
}
}

#[derive(Debug, Clone)]
pub struct JsonOp {
pub content: JsonOpContent,
Expand All @@ -765,6 +777,19 @@ pub mod json {
Future(FutureOpWrapper),
}

impl JsonOpContent {
pub fn op_len(&self) -> usize {
match self {
JsonOpContent::List(list_op) => list_op.op_len(),
JsonOpContent::MovableList(movable_list_op) => movable_list_op.op_len(),
JsonOpContent::Map(..) => 1,
JsonOpContent::Text(text_op) => text_op.op_len(),
JsonOpContent::Tree(..) => 1,
JsonOpContent::Future(..) => 1,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FutureOpWrapper {
#[serde(flatten)]
Expand All @@ -777,7 +802,7 @@ pub mod json {
pub enum ListOp {
Insert {
pos: u32,
value: LoroValue,
value: Vec<LoroValue>,
},
Delete {
pos: i32,
Expand All @@ -787,12 +812,21 @@ pub mod json {
},
}

impl ListOp {
fn op_len(&self) -> usize {
match self {
ListOp::Insert { value: values, .. } => values.len(),
ListOp::Delete { len, .. } => (*len).unsigned_abs() as usize,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum MovableListOp {
Insert {
pos: u32,
value: LoroValue,
value: Vec<LoroValue>,
},
Delete {
pos: i32,
Expand All @@ -813,6 +847,17 @@ pub mod json {
},
}

impl MovableListOp {
fn op_len(&self) -> usize {
match self {
MovableListOp::Insert { value: values, .. } => values.len(),
MovableListOp::Delete { len, .. } => (*len).unsigned_abs() as usize,
MovableListOp::Move { .. } => 1,
MovableListOp::Set { .. } => 1,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum MapOp {
Expand Down Expand Up @@ -843,6 +888,17 @@ pub mod json {
MarkEnd,
}

impl TextOp {
fn op_len(&self) -> usize {
match self {
TextOp::Insert { text, .. } => text.chars().count(),
TextOp::Delete { len, .. } => len.unsigned_abs() as usize,
TextOp::Mark { .. } => 1,
TextOp::MarkEnd => 1,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum TreeOp {
Expand Down Expand Up @@ -1178,6 +1234,157 @@ pub mod json {
}
}
}

#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum RedactError {
#[error("unknown operation type")]
UnknownOperationType,
}

/// Redacts sensitive content within the specified range by replacing it with default values.
///
/// This method applies the following redaction rules:
///
/// - Preserves delete and move operations without changes
/// - Replaces text insertion content with the Unicode replacement character (U+FFFD)
/// - Substitutes list and map insert values with `LoroValue::Null`
/// - Maintains child container creation operations
/// - Replaces text mark values with `LoroValue::Null`
/// - Preserves map insertion and text annotation keys
/// - Resets counter operations to zero
/// - Leaves unknown operation types (from future Loro versions) unchanged
///
/// This approach ensures sensitive data removal while preserving the document's overall
/// structure. Redacted documents maintain seamless collaboration capabilities with both
/// redacted and non-redacted versions.
pub fn redact(json: &mut JsonSchema, range: VersionRange) -> Result<(), RedactError> {
let peers = json.peers.clone();
let mut errors = Vec::new();
for change in json.changes.iter_mut() {
let real_peer = peers[change.id.peer as usize];
let real_id = ID::new(real_peer, change.id.counter);
if !range.has_overlap_with(real_id.to_span(change.op_len())) {
continue;
}

let redact_range = range.get(&real_peer).copied().unwrap();
for op in change.ops.iter_mut() {
if op.counter >= redact_range.1 {
break;
}

let len = op.content.op_len() as Counter;
if op.counter + len <= redact_range.0 {
continue;
}

let result = redact_op(
&mut op.content,
(redact_range.0 - op.counter).max(0).min(len)
..(redact_range.1 - op.counter).max(0).min(len),
);
match result {
Ok(()) => {}
Err(e) => errors.push(e),
}
}
}

if errors.is_empty() {
Ok(())
} else {
Err(errors.pop().unwrap())
}
}

fn redact_op(op: &mut JsonOpContent, range: Range<Counter>) -> Result<(), RedactError> {
match op {
JsonOpContent::List(list_op) => {
match list_op {
ListOp::Insert { value: values, .. } => {
for i in range {
redact_value(&mut values[i as usize]);
}
}
ListOp::Delete { .. } => {
// Delete op won't be changed
}
}
}
JsonOpContent::MovableList(movable_list_op) => {
match movable_list_op {
MovableListOp::Insert { value: values, .. } => {
for i in range {
redact_value(&mut values[i as usize]);
}
}
MovableListOp::Delete { .. } | MovableListOp::Move { .. } => {
// Delete and move ops won't be changed
}
MovableListOp::Set { value, .. } => {
assert!(range.start == 0 && range.len() == 1);
redact_value(value);
}
}
}
JsonOpContent::Map(map_op) => {
match map_op {
MapOp::Insert { value, .. } => {
assert!(range.start == 0 && range.len() == 1);
redact_value(value);
}
MapOp::Delete { .. } => {
// Delete op won't be changed
}
}
}
JsonOpContent::Text(text_op) => {
match text_op {
TextOp::Insert { text, .. } => {
let mut chars = vec![];
for (i, c) in text.chars().enumerate() {
if i < range.start as usize || i >= range.end as usize {
chars.push(c);
} else {
chars.push("� ".chars().next().unwrap());
}
}
*text = chars.into_iter().collect();
}
TextOp::Delete { .. } => {
// Delete op won't be changed
}
TextOp::Mark { style_value, .. } => {
assert!(range.start == 0 && range.len() == 1);
*style_value = LoroValue::Null;
}
TextOp::MarkEnd => {
// MarkEnd won't be changed
}
}
}
JsonOpContent::Tree(..) => {
// Creation of child container won't be changed
}
JsonOpContent::Future(future_op_wrapper) => match &mut future_op_wrapper.value {
FutureOp::Counter(owned_value) => {
*owned_value = OwnedValue::I64(0);
}
FutureOp::Unknown(..) => {
return Err(RedactError::UnknownOperationType);
}
},
}

Ok(())
}
}

fn redact_value(v: &mut LoroValue) {
match v {
LoroValue::Container(_) => {}
_ => *v = LoroValue::Null,
}
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit f2365a8

Please sign in to comment.