-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Evaluation of template macros (#674)
This PR enables the evaluation of user-defined template macros. At a high level, the changes involved include: * Introducing a `TemplateCompiler` that reads TDL and produces an in-memory representation that can be efficiently evaluated. * Consolidating the e-expression evaluator and the template macro evaluator into a unified `MacroEvaluator`. * Moving `SystemReader` functionality into `ExpandingReader` to consolidate (temporary) uses of `unsafe`. A later PR will merge these types. (See #675.) * Removing the `MacroInvocation` trait in favor of the `MacroExpr` enum. * Renaming `RawMacroInvocation` to `RawEExpression`, which is more accurate. * Renaming `LazyDecoder::RawMacroInvocation to `_::EExpression`. * Removing 'data lifetime from <'top, 'data, D> signatures. * Removing 'data lifetime from the `LazyDecoder` trait. * Caching nested expressions during parsing. Regarding expression caching: The text reader performs parsing in two phases: first, it matches values and expressions. Upon request, it can then read the expression it matched. Prior to this PR, structural information detected during the matching phase would be discarded. For example, when the reader would match a list, it would also match all of the list's child expressions in the process. However, the child expressions would not be stored; when the application began reading the list, each one would need to be matched again. This redundant matching effort would also happen for the fields of a struct or the arguments of an e-expression. The `TextEncoding_1_1` decoder now caches child and argument expressions in the bump allocator, which makes iterating over the container or e-expression "free" during the reading phase. As part of this change, the lifetime associated with `TextBufferView` was reduced from `'data` to `'top`. Readers now hold a `&[u8]` directly and construct a transient `TextBufferView` at read time. This allows the `TextBufferView` to hold a reference to the bump allocator, making the allocator available in all of the parsing methods without having to manually plumb it into each method.
- Loading branch information
Showing
45 changed files
with
5,153 additions
and
2,308 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use ion_rs::lazy::decoder::LazyDecoder; | ||
use ion_rs::lazy::encoding::TextEncoding_1_1; | ||
use ion_rs::lazy::r#struct::LazyStruct; | ||
use ion_rs::lazy::reader::{LazyApplicationReader, LazyTextReader_1_1}; | ||
use ion_rs::lazy::value::LazyValue; | ||
use ion_rs::lazy::value_ref::ValueRef; | ||
use ion_rs::{Element, Format, IonResult, TextKind}; | ||
use ion_rs::{ElementReader, IonData}; | ||
|
||
fn rewrite_as_compact_text(pretty_ion: &str) -> IonResult<String> { | ||
let values = Element::read_all(pretty_ion).unwrap(); | ||
let mut buffer = Vec::new(); | ||
Element::write_all_as(&values, Format::Text(TextKind::Compact), &mut buffer)?; | ||
Ok(String::from_utf8(buffer).unwrap()) | ||
} | ||
|
||
pub fn criterion_benchmark(c: &mut Criterion) { | ||
const NUM_VALUES: usize = 10_000; | ||
let pretty_data_1_0 = r#"{ | ||
'timestamp': 1670446800245, | ||
'threadId': 418, | ||
'threadName': "scheduler-thread-6", | ||
'loggerName': "com.example.organization.product.component.ClassName", | ||
'logLevel': INFO, | ||
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}", | ||
'parameters': ["SUCCESS","example-client-1","aws-us-east-5f-18b4fa","region 4","2022-12-07T20:59:59.744000Z",], | ||
}"#.repeat(NUM_VALUES); | ||
let data_1_0 = rewrite_as_compact_text(&pretty_data_1_0).unwrap(); | ||
let template_text = r#" | ||
(macro event (timestamp thread_id thread_name client_num host_id parameters) | ||
{ | ||
'timestamp': timestamp, | ||
'threadId': thread_id, | ||
'threadName': (make_string "scheduler-thread-" thread_name), | ||
'loggerName': "com.example.organization.product.component.ClassName", | ||
'logLevel': (quote INFO), | ||
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}", | ||
'parameters': [ | ||
"SUCCESS", | ||
(make_string "example-client-" client_num), | ||
(make_string "aws-us-east-5f-" host_id), | ||
parameters | ||
] | ||
} | ||
) | ||
"#; | ||
|
||
let data_1_1 = r#"(:event 1670446800245 418 "6" "1" "18b4fa" (:values "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(NUM_VALUES); | ||
|
||
println!("Ion 1.0 data size: {} bytes", data_1_0.len()); | ||
println!("Ion 1.1 data size: {} bytes", data_1_1.len()); | ||
|
||
// As a sanity check, materialize the data from both the Ion 1.0 and 1.1 streams and make sure | ||
// that they are equivalent before we start measuring the time needed to read them. | ||
let seq_1_0 = LazyTextReader_1_1::new(data_1_0.as_bytes()) | ||
.unwrap() | ||
.read_all_elements() | ||
.unwrap(); | ||
let mut reader_1_1 = LazyTextReader_1_1::new(data_1_1.as_bytes()).unwrap(); | ||
reader_1_1.register_template(template_text).unwrap(); | ||
let seq_1_1 = reader_1_1.read_all_elements().unwrap(); | ||
assert!( | ||
IonData::eq(&seq_1_0, &seq_1_1), | ||
"Ion 1.0 sequence was not equal to the Ion 1.1 sequence" | ||
); | ||
|
||
fn count_value_and_children<D: LazyDecoder>(lazy_value: &LazyValue<'_, D>) -> IonResult<usize> { | ||
use ValueRef::*; | ||
let child_count = match lazy_value.read()? { | ||
List(s) => count_sequence_children(s.iter())?, | ||
SExp(s) => count_sequence_children(s.iter())?, | ||
Struct(s) => count_struct_children(&s)?, | ||
scalar => { | ||
let _ = black_box(scalar); | ||
0 | ||
} | ||
}; | ||
Ok(1 + child_count) | ||
} | ||
|
||
fn count_sequence_children<'a, D: LazyDecoder>( | ||
lazy_sequence: impl Iterator<Item = IonResult<LazyValue<'a, D>>>, | ||
) -> IonResult<usize> { | ||
let mut count = 0; | ||
for value in lazy_sequence { | ||
count += count_value_and_children(&value?)?; | ||
} | ||
Ok(count) | ||
} | ||
|
||
fn count_struct_children<D: LazyDecoder>(lazy_struct: &LazyStruct<'_, D>) -> IonResult<usize> { | ||
let mut count = 0; | ||
for field in lazy_struct { | ||
count += count_value_and_children(&field?.value())?; | ||
} | ||
Ok(count) | ||
} | ||
|
||
c.bench_function("text 1.0: scan all", |b| { | ||
b.iter(|| { | ||
let mut reader = | ||
LazyApplicationReader::<'_, TextEncoding_1_1>::new(data_1_0.as_bytes()).unwrap(); | ||
while let Some(item) = reader.next().unwrap() { | ||
black_box(item); | ||
} | ||
}) | ||
}); | ||
c.bench_function("text 1.0: read all", |b| { | ||
b.iter(|| { | ||
let mut reader = | ||
LazyApplicationReader::<'_, TextEncoding_1_1>::new(data_1_0.as_bytes()).unwrap(); | ||
let mut num_values = 0usize; | ||
while let Some(item) = reader.next().unwrap() { | ||
num_values += count_value_and_children(&item).unwrap(); | ||
} | ||
let _ = black_box(num_values); | ||
}) | ||
}); | ||
c.bench_function("text 1.1: scan all", |b| { | ||
b.iter(|| { | ||
let mut reader = LazyTextReader_1_1::new(data_1_1.as_bytes()).unwrap(); | ||
reader.register_template(template_text).unwrap(); | ||
while let Some(item) = reader.next().unwrap() { | ||
black_box(item); | ||
} | ||
}) | ||
}); | ||
c.bench_function("text 1.1: read all", |b| { | ||
b.iter(|| { | ||
let mut reader = LazyTextReader_1_1::new(data_1_1.as_bytes()).unwrap(); | ||
reader.register_template(template_text).unwrap(); | ||
let mut num_values = 0usize; | ||
while let Some(item) = reader.next().unwrap() { | ||
num_values += count_value_and_children(&item).unwrap(); | ||
} | ||
let _ = black_box(num_values); | ||
}) | ||
}); | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.