Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: redact #504

Merged
merged 1 commit into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 237 additions & 30 deletions crates/loro-internal/src/encoding/json_schema.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
use std::sync::Arc;

use either::Either;
use loro_common::{
ContainerID, ContainerType, CounterSpan, HasCounter, HasCounterSpan, IdLp, IdSpanVector,
LoroError, LoroResult, LoroValue, PeerID, TreeID, ID,
use super::{
outdated_encode_reordered::{import_changes_to_oplog, ImportChangesResult, ValueRegister},
ImportStatus,
};
use rle::{HasLength, RleVec, Sliceable};

use crate::{
arena::SharedArena,
change::Change,
Expand All @@ -21,12 +16,14 @@ use crate::{
version::Frontiers,
OpLog, VersionVector,
};

use super::{
outdated_encode_reordered::{import_changes_to_oplog, ImportChangesResult, ValueRegister},
ImportStatus,
};
use either::Either;
use json::{JsonOpContent, JsonSchema};
use loro_common::{
ContainerID, ContainerType, CounterSpan, HasCounter, HasCounterSpan, IdLp, IdSpanVector,
LoroError, LoroResult, LoroValue, PeerID, TreeID, ID,
};
use rle::{HasLength, RleVec, Sliceable};
use std::sync::Arc;

const SCHEMA_VERSION: u8 = 1;

Expand Down Expand Up @@ -229,9 +226,9 @@ fn encode_changes(
ContainerType::List => match content {
InnerContent::List(list) => JsonOpContent::List(match list {
InnerListOp::Insert { slice, pos } => {
let mut value =
let mut values =
arena.get_values(slice.0.start as usize..slice.0.end as usize);
value.iter_mut().for_each(|x| {
values.iter_mut().for_each(|x| {
if let LoroValue::Container(id) = x {
if id.is_normal() {
*id = register_container_id(id.clone(), peer_register);
Expand All @@ -240,7 +237,7 @@ fn encode_changes(
});
json::ListOp::Insert {
pos: *pos as u32,
value: value.into(),
value: values,
}
}
InnerListOp::Delete(DeleteSpanWithId {
Expand All @@ -258,9 +255,9 @@ fn encode_changes(
ContainerType::MovableList => match content {
InnerContent::List(list) => JsonOpContent::MovableList(match list {
InnerListOp::Insert { slice, pos } => {
let mut value =
let mut values =
arena.get_values(slice.0.start as usize..slice.0.end as usize);
value.iter_mut().for_each(|x| {
values.iter_mut().for_each(|x| {
if let LoroValue::Container(id) = x {
if id.is_normal() {
*id = register_container_id(id.clone(), peer_register);
Expand All @@ -269,7 +266,7 @@ fn encode_changes(
});
json::MovableListOp::Insert {
pos: *pos as u32,
value: value.into(),
value: values,
}
}
InnerListOp::Delete(DeleteSpanWithId {
Expand Down Expand Up @@ -537,9 +534,11 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &[PeerID]) -> LoroRes
},
ContainerType::List => match content {
JsonOpContent::List(list) => match list {
json::ListOp::Insert { pos, value } => {
let mut values = value.into_list().unwrap();
Arc::make_mut(&mut values).iter_mut().for_each(|v| {
json::ListOp::Insert {
pos,
value: mut values,
} => {
values.iter_mut().for_each(|v| {
if let LoroValue::Container(id) = v {
if id.is_normal() {
*id = convert_container_id(id.clone(), peers);
Expand All @@ -566,9 +565,11 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &[PeerID]) -> LoroRes
},
ContainerType::MovableList => match content {
JsonOpContent::MovableList(list) => match list {
json::MovableListOp::Insert { pos, value } => {
let mut values = value.into_list().unwrap();
Arc::make_mut(&mut values).iter_mut().for_each(|v| {
json::MovableListOp::Insert {
pos,
value: mut values,
} => {
values.iter_mut().for_each(|v| {
if let LoroValue::Container(id) = v {
if id.is_normal() {
*id = convert_container_id(id.clone(), peers);
Expand Down Expand Up @@ -717,12 +718,16 @@ impl TryFrom<String> for JsonSchema {
}

pub mod json {

use crate::{
encoding::OwnedValue,
version::{Frontiers, VersionRange},
};
use fractional_index::FractionalIndex;
use loro_common::{ContainerID, IdLp, Lamport, LoroValue, PeerID, TreeID, ID};
use loro_common::{ContainerID, Counter, IdLp, Lamport, LoroValue, PeerID, TreeID, ID};
use serde::{Deserialize, Serialize};
use std::ops::Range;

use crate::{encoding::OwnedValue, version::Frontiers};
use super::redact_value;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonSchema {
Expand All @@ -746,6 +751,13 @@ pub mod json {
pub ops: Vec<JsonOp>,
}

impl JsonChange {
pub fn op_len(&self) -> usize {
let last_op = self.ops.last().unwrap();
(last_op.counter - self.id.counter) as usize + last_op.content.op_len()
}
}

#[derive(Debug, Clone)]
pub struct JsonOp {
pub content: JsonOpContent,
Expand All @@ -765,6 +777,19 @@ pub mod json {
Future(FutureOpWrapper),
}

impl JsonOpContent {
pub fn op_len(&self) -> usize {
match self {
JsonOpContent::List(list_op) => list_op.op_len(),
JsonOpContent::MovableList(movable_list_op) => movable_list_op.op_len(),
JsonOpContent::Map(..) => 1,
JsonOpContent::Text(text_op) => text_op.op_len(),
JsonOpContent::Tree(..) => 1,
JsonOpContent::Future(..) => 1,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FutureOpWrapper {
#[serde(flatten)]
Expand All @@ -777,7 +802,7 @@ pub mod json {
pub enum ListOp {
Insert {
pos: u32,
value: LoroValue,
value: Vec<LoroValue>,
},
Delete {
pos: i32,
Expand All @@ -787,12 +812,21 @@ pub mod json {
},
}

impl ListOp {
fn op_len(&self) -> usize {
match self {
ListOp::Insert { value: values, .. } => values.len(),
ListOp::Delete { len, .. } => (*len).unsigned_abs() as usize,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum MovableListOp {
Insert {
pos: u32,
value: LoroValue,
value: Vec<LoroValue>,
},
Delete {
pos: i32,
Expand All @@ -813,6 +847,17 @@ pub mod json {
},
}

impl MovableListOp {
fn op_len(&self) -> usize {
match self {
MovableListOp::Insert { value: values, .. } => values.len(),
MovableListOp::Delete { len, .. } => (*len).unsigned_abs() as usize,
MovableListOp::Move { .. } => 1,
MovableListOp::Set { .. } => 1,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum MapOp {
Expand Down Expand Up @@ -843,6 +888,17 @@ pub mod json {
MarkEnd,
}

impl TextOp {
fn op_len(&self) -> usize {
match self {
TextOp::Insert { text, .. } => text.chars().count(),
TextOp::Delete { len, .. } => len.unsigned_abs() as usize,
TextOp::Mark { .. } => 1,
TextOp::MarkEnd => 1,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum TreeOp {
Expand Down Expand Up @@ -1178,6 +1234,157 @@ pub mod json {
}
}
}

#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum RedactError {
#[error("unknown operation type")]
UnknownOperationType,
}

/// Redacts sensitive content within the specified range by replacing it with default values.
///
/// This method applies the following redaction rules:
///
/// - Preserves delete and move operations without changes
/// - Replaces text insertion content with the Unicode replacement character (U+FFFD)
/// - Substitutes list and map insert values with `LoroValue::Null`
/// - Maintains child container creation operations
/// - Replaces text mark values with `LoroValue::Null`
/// - Preserves map insertion and text annotation keys
/// - Resets counter operations to zero
/// - Leaves unknown operation types (from future Loro versions) unchanged
///
/// This approach ensures sensitive data removal while preserving the document's overall
/// structure. Redacted documents maintain seamless collaboration capabilities with both
/// redacted and non-redacted versions.
pub fn redact(json: &mut JsonSchema, range: VersionRange) -> Result<(), RedactError> {
let peers = json.peers.clone();
let mut errors = Vec::new();
for change in json.changes.iter_mut() {
let real_peer = peers[change.id.peer as usize];
let real_id = ID::new(real_peer, change.id.counter);
if !range.has_overlap_with(real_id.to_span(change.op_len())) {
continue;
}

let redact_range = range.get(&real_peer).copied().unwrap();
for op in change.ops.iter_mut() {
if op.counter >= redact_range.1 {
break;
}

let len = op.content.op_len() as Counter;
if op.counter + len <= redact_range.0 {
continue;
}

let result = redact_op(
&mut op.content,
(redact_range.0 - op.counter).max(0).min(len)
..(redact_range.1 - op.counter).max(0).min(len),
);
match result {
Ok(()) => {}
Err(e) => errors.push(e),
}
}
}

if errors.is_empty() {
Ok(())
} else {
Err(errors.pop().unwrap())
}
}

fn redact_op(op: &mut JsonOpContent, range: Range<Counter>) -> Result<(), RedactError> {
match op {
JsonOpContent::List(list_op) => {
match list_op {
ListOp::Insert { value: values, .. } => {
for i in range {
redact_value(&mut values[i as usize]);
}
}
ListOp::Delete { .. } => {
// Delete op won't be changed
}
}
}
JsonOpContent::MovableList(movable_list_op) => {
match movable_list_op {
MovableListOp::Insert { value: values, .. } => {
for i in range {
redact_value(&mut values[i as usize]);
}
}
MovableListOp::Delete { .. } | MovableListOp::Move { .. } => {
// Delete and move ops won't be changed
}
MovableListOp::Set { value, .. } => {
assert!(range.start == 0 && range.len() == 1);
redact_value(value);
}
}
}
JsonOpContent::Map(map_op) => {
match map_op {
MapOp::Insert { value, .. } => {
assert!(range.start == 0 && range.len() == 1);
redact_value(value);
}
MapOp::Delete { .. } => {
// Delete op won't be changed
}
}
}
JsonOpContent::Text(text_op) => {
match text_op {
TextOp::Insert { text, .. } => {
let mut chars = vec![];
for (i, c) in text.chars().enumerate() {
if i < range.start as usize || i >= range.end as usize {
chars.push(c);
} else {
chars.push("� ".chars().next().unwrap());
}
}
*text = chars.into_iter().collect();
}
TextOp::Delete { .. } => {
// Delete op won't be changed
}
TextOp::Mark { style_value, .. } => {
assert!(range.start == 0 && range.len() == 1);
*style_value = LoroValue::Null;
}
TextOp::MarkEnd => {
// MarkEnd won't be changed
}
}
}
JsonOpContent::Tree(..) => {
// Creation of child container won't be changed
}
JsonOpContent::Future(future_op_wrapper) => match &mut future_op_wrapper.value {
FutureOp::Counter(owned_value) => {
*owned_value = OwnedValue::I64(0);
}
FutureOp::Unknown(..) => {
return Err(RedactError::UnknownOperationType);
}
},
}

Ok(())
}
}

fn redact_value(v: &mut LoroValue) {
match v {
LoroValue::Container(_) => {}
_ => *v = LoroValue::Null,
}
}

#[cfg(test)]
Expand Down
Loading
Loading