Skip to content

Commit

Permalink
basic doc comments
Browse files Browse the repository at this point in the history
  • Loading branch information
djellemah committed Apr 27, 2024
1 parent 9578abe commit 871bd80
Show file tree
Hide file tree
Showing 13 changed files with 91 additions and 40 deletions.
2 changes: 2 additions & 0 deletions src/channel.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Send parse events across a channel, to decouple parsing from handling.
use crate::parser::JsonEvents;
use crate::jsonpath::JsonPath;
use crate::sender::Sender;
Expand Down
2 changes: 1 addition & 1 deletion src/fn_snd.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/// Mimimal implementation for a Sender to have a function which receives the events.
//! Mimimal implementation for a Sender to have a function which receives the events.
use crate::sender::Sender;

// This is a lot of machinery just to call a function :-\
Expand Down
33 changes: 20 additions & 13 deletions src/handler.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,42 @@
/*!
This traverses/handles the incoming json events from the streaming parser.
*/
use crate::parser::JsonEvents;
use crate::sender::Sender;
use crate::sender::Event;
use crate::jsonpath::*;

use json_event_parser::JsonEvent;

// This traverses/handles the incoming json stream events.
//
// Really just becomes a place to hang match_path and maybe_send_value without
// threading those functions through the JsonEvent handlers. Effectively it's a
// visitor with accept = match_path and visit = maybe_send_value
/**
The Handler trait.
A place to hang `match_path` and `maybe_send_value` without
threading those functions through the JsonEvent handlers.
Effectively it's a
visitor with accept = match_path and visit = maybe_send_value
*/
pub trait Handler {
// value contained by Event
/// value contained by Event
// Lifetime bound is so that events are allowed the shortest lifetime possible,
// hence the where clauses and higher-ranked for declarations in the below trait methods.
type V<'l> where Self : 'l;

// TODO this is optional?
fn match_path(&self, path : &JsonPath) -> bool;

// This will be called for each leaf value, along with its path.
/// This will be called for each leaf value, along with its path.
fn maybe_send_value<'a, Snd>(&self, path : &JsonPath, ev : &JsonEvent, tx : &mut Snd)
-> Result<(),<Snd as Sender<Event<<Self as Handler>::V<'_>>>>::SendError>
// the `for` is critical here because 'x must have a longer lifetime than 'a but a shorter lifetime than 'l
where Snd : for <'x> Sender<Event<Self::V<'x>>>
;

// Handle all arrays.
// values will be emitted via maybe_send_value
// nested arrays are recursive
// objects are sent to object(...)
/// Handle all arrays.
/// values will be emitted via maybe_send_value
/// nested arrays are recursive
/// objects are sent to object(...)
// TODO why is depth here? It's duplicated in the parents path.
fn array<'a, Snd>(&self, jevs : &mut JsonEvents, parents : JsonPath, depth : u64, tx : &mut Snd )
-> Result<(),<Snd as Sender<Event<<Self as Handler>::V<'_>>>>::SendError>
Expand Down Expand Up @@ -64,8 +71,7 @@ pub trait Handler {
Ok(())
}

// handle objects.
//
/// handle objects.
fn object<'a, Snd>(&self, jevs : &mut JsonEvents, parents : JsonPath, depth : u64, tx : &mut Snd )
-> Result<(),<Snd as Sender<Event<<Self as Handler>::V<'_>>>>::SendError>
where Snd : for <'x> Sender<Event<Self::V<'x>>>
Expand Down Expand Up @@ -96,6 +102,7 @@ pub trait Handler {
Ok(())
}

/// Handle String Number Boolean Null (ie non-recursive)
fn value<'a,Snd>(&self, jevs : &mut JsonEvents, parents : JsonPath, depth : u64, tx : &mut Snd)
-> Result<(),<Snd as Sender<Event<<Self as Handler>::V<'_>>>>::SendError>
where Snd : for <'x> Sender<Event<Self::V<'x>>>
Expand Down
14 changes: 8 additions & 6 deletions src/jsonpath.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
/// This is a json path, ie an ordered set of steps,
/// where each step is either a key name or an index.
/// It must be optimised for add/remove the last element,
/// and cloning should be cheap.
///
/// rpds::Vector meets those requirements.
/*!
This is a json path, ie an ordered set of steps,
where each step is either a key name or an index.
It must be optimised for add/remove the last element,
and cloning should be cheap.
`rpds::Vector` meets those requirements.
*/

/// The type for Index elements of a json path.
///
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ pub mod schema;
pub mod valuer;
pub mod channel;
pub mod fn_snd;

/// The idea here was something like ruby's ARGF, ie stdin and then all command line args that are files.
/// But currently it only handles either stdin or a single file.
pub fn make_readable<S>(maybe_readable_args : &[S]) -> Box<dyn std::io::BufRead>
where S : AsRef<str> + std::convert::AsRef<std::path::Path> + std::fmt::Debug
{
Expand Down
3 changes: 3 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ use cln::fn_snd;

use std::process::exit;

/// The most useful thing this does is calculate a Schema for a json file. Really fast.
/// The rest of it is a showcase and testbed for some of the other things that can be done.
fn main() {
// Quick'n'Dirty command line arg dispatch
let args : Vec<String> = std::env::args().collect();
Expand All @@ -21,6 +23,7 @@ fn main() {
let mut jevstream = parser::JsonEvents::new(istream);
schema::schema(&mut jevstream);
}
// This are POC to see that the rest of the handlers and visitors work.
["-p", rst @ ..] => {
let istream = cln::make_readable(rst);
let mut jevstream = parser::JsonEvents::new(istream);
Expand Down
4 changes: 4 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/*!
The interface to the parser, currently json-event-parser.
*/

pub struct JsonCounter(countio::Counter<Box<dyn std::io::BufRead>>);

impl std::io::Read for JsonCounter {
Expand Down
4 changes: 4 additions & 0 deletions src/plain.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/*!
A really simple visitor that always prints out everything.
*/

use crate::handler::Handler;
use crate::jsonpath::JsonPath;
use crate::sender::Event;
Expand Down
20 changes: 20 additions & 0 deletions src/schema.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/*!
One way to view a tree is a map of path => value, or for its schema path => type.
This parses a json document and collects the type for each path, including some basic statistics.
*/

use std::cell::RefCell;

use crate::parser::JsonEvents;
Expand All @@ -6,6 +12,20 @@ use crate::sender::Sender;
use crate::jsonpath::JsonPath;
use crate::sender::Event;

/*
tree is a map of path => [(type, count)]
NOTE this already sortof exists in serde_json with feature arbitrary_precision
enum N {
PosInt(u64),
/// Always less than zero.
NegInt(i64),
/// Always finite.
Float(f64),
}
*/

/// The various kinds of json number, in numeric format.
#[derive(Debug,Clone)]
pub enum NumberType {
// max
Expand Down
10 changes: 6 additions & 4 deletions src/sender.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// The Sender trait.
//
// The Handler will ultimately send this event to an implementation of Sender.
/*!
The Sender trait.
The Handler will ultimately send this event to an implementation of Sender.
*/

use crate::sendpath::SendPath;

Expand All @@ -15,7 +17,7 @@ pub enum Event<V> {
Error(String),
}

// This can be anything from a function call to a channel.
/// This can be implemented by anything from a function call to a channel.
pub trait Sender<Event> {
type SendError;
fn send<'a>(&mut self, ev: Box<Event>) -> Result<(), Self::SendError>;
Expand Down
10 changes: 6 additions & 4 deletions src/sendpath.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Basically this implements a JsonPath that's optimised for sending over a
// channel without excessive copying.

/*!
This implements a JsonPath that's optimised for sending over a
channel without excessive copying.
*/
use crate::jsonpath::JsonPath;
use crate::jsonpath::Step;

Expand All @@ -19,7 +20,8 @@ mod like_jsonpath {
}
}

// a tree path as sent by the streaming parser to a handler of some kind, along with its leaf value.
/// A tree path optimised for sending. Which means we can't in general keep references.
// TODO implement a reference for sending to functions and other non-channels.
#[derive(Debug,Clone)]
pub struct SendPath(pub Vec<Step>);

Expand Down
24 changes: 13 additions & 11 deletions src/shredder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Write each leaf value to a separate file for its path.
// a la the Shredder algorithm in Dremel paper.
// TODO implement the Repetition and Definition Levels
/*!
This writes out a file for each path, where indexes are removed from the path.
Each file contains all the values from that path, in order.
*/

use crate::parser;
use crate::handler::Handler;
use crate::jsonpath::*;
Expand Down Expand Up @@ -53,13 +55,13 @@ impl<V> ShredWriter<V>
dir.join(filename)
}

// find or create a given file for the jsonpath
//
// Self keeps a hashmap of
//
// PathBuf => File
//
// so it doesn't repeatedly reopen the same files.
/// find or create a given file for the jsonpath
///
/// Self keeps a hashmap of
///
/// PathBuf => File
///
/// so it doesn't repeatedly reopen the same files.
fn find_or_create<'a>(&'a mut self, send_path : &crate::sendpath::SendPath) -> &'a std::fs::File {
let filename = Self::filename_of_path(&self.dir, send_path, &self.ext);
if self.files.contains_key(&filename) {
Expand Down Expand Up @@ -120,7 +122,7 @@ impl MsgPacker {
match ev {
&String(v) => {
match rmp::encode::write_str(&mut buf, &v) {
Ok(()) => Event::Value(SendPath::from(path),buf),
Ok(()) => Event::Value(SendPath::from(path), buf),
Err(err) => panic!("msgpack error {err}"),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/valuer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/// Converts incoming JsonEvents to serde_json::Value
//! Converts incoming JsonEvents to serde_json::Value
use crate::sendpath::SendPath;
use crate::handler::Handler;
Expand Down

0 comments on commit 871bd80

Please sign in to comment.