-
Notifications
You must be signed in to change notification settings - Fork 36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Overhaul the text parsers, port from nom
to winnow
#892
base: main
Are you sure you want to change the base?
Changes from 27 commits
fd4e853
158c3ae
0e53e48
4bd0ae4
7ea8b12
092032e
32d215f
4a945c8
b2d65fd
33a7ef9
4dab692
58ce9cd
3205514
84a0a26
41dabd0
1d1def2
7cd83c8
8201e04
5b1b84e
aa27f61
6016bf3
46842da
0b9aa80
fba64e9
ee4c5a7
c8a414d
9a6473d
a9583c5
2ee8877
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,9 +47,8 @@ fn maximally_compact_1_1_data(num_values: usize) -> TestData_1_1 { | |
|
||
let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values); | ||
|
||
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🪧 This benchmark is really showing its age. When it was written, there was no support for reading encoding directives, so the tests/benchmarks manually compiled and registered their own templates. Now that the readers manage their encoding context as expected, reading a leading IVM clears the manually registered templates. When our managed writer API is fleshed out, we'll have a way to hand a macro to the writer so it gets serialized in the data stream. For now, we simply skip the IVM in binary 1.1. |
||
#[rustfmt::skip] | ||
let mut binary_1_1_data_body: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID | ||
let binary_1_1_data: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID | ||
0b10, // [NOTE: `0b`] `parameters*` arg is an arg group | ||
0x66, // 6-byte integer (`timestamp` param) | ||
0x75, 0x5D, 0x63, 0xEE, 0x84, 0x01, | ||
|
@@ -73,7 +72,6 @@ fn maximally_compact_1_1_data(num_values: usize) -> TestData_1_1 { | |
0x39, 0x3A, 0x35, 0x39, | ||
0x2E, 0x37, 0x34, 0x34, | ||
0x30, 0x30, 0x30, 0x5A].repeat(num_values); | ||
binary_1_1_data.append(&mut binary_1_1_data_body); | ||
TestData_1_1 { | ||
name: "maximally compact".to_owned(), | ||
template_definition_text, | ||
|
@@ -107,9 +105,8 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 { | |
"#; | ||
|
||
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values); | ||
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM | ||
#[rustfmt::skip] | ||
let mut binary_1_1_data_body: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID | ||
let binary_1_1_data: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID | ||
0b10, // [NOTE: `0b` prefix] `parameters*` arg is an arg group | ||
0x66, // 6-byte integer (`timestamp` param) | ||
0x75, 0x5D, 0x63, 0xEE, 0x84, 0x01, | ||
|
@@ -142,7 +139,6 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 { | |
0x2E, 0x37, 0x34, 0x34, | ||
0x30, 0x30, 0x30, 0x5A].repeat(num_values); | ||
|
||
binary_1_1_data.append(&mut binary_1_1_data_body); | ||
TestData_1_1 { | ||
name: "moderately compact".to_owned(), | ||
template_definition_text: template_definition_text.to_owned(), | ||
|
@@ -176,9 +172,8 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_ | |
"#; | ||
|
||
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values); | ||
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM | ||
#[rustfmt::skip] | ||
let mut binary_1_1_data_body: Vec<u8> = [0xF5, // LP invocation | ||
let binary_1_1_data: Vec<u8> = [0xF5, // LP invocation | ||
((MacroTable::FIRST_USER_MACRO_ID * 2) + 1) as u8, // Macro ID | ||
0xDF, // Length prefix: FlexUInt 111 | ||
0b10, // [NOTE: `0b` prefix] `parameters*` arg is an arg group | ||
|
@@ -213,7 +208,6 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_ | |
0x2E, 0x37, 0x34, 0x34, | ||
0x30, 0x30, 0x30, 0x5A].repeat(num_values); | ||
|
||
binary_1_1_data.append(&mut binary_1_1_data_body); | ||
TestData_1_1 { | ||
name: "moderately compact w/length-prefixed top level".to_owned(), | ||
template_definition_text: template_definition_text.to_owned(), | ||
|
@@ -444,12 +438,12 @@ mod benchmark { | |
b.iter(|| { | ||
// We don't have an API for doing this with the application-level reader yet, so | ||
// for now we use a manually configured context and a raw reader. | ||
let mut reader = LazyRawBinaryReader_1_1::new(binary_1_1_data); | ||
let mut reader = LazyRawBinaryReader_1_1::new(context_ref, binary_1_1_data); | ||
let mut num_top_level_values: usize = 0; | ||
// Skip past the IVM | ||
reader.next(context_ref).unwrap().expect_ivm().unwrap(); | ||
reader.next().unwrap().expect_ivm().unwrap(); | ||
Comment on lines
-447
to
+444
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🪧 The raw readers now take a reference to the encoding context at construction time instead of having it be passed into each call to Taking them as an argument to |
||
// Expect every top-level item to be an e-expression. | ||
while let RawStreamItem::EExp(raw_eexp) = reader.next(context_ref).unwrap() { | ||
while let RawStreamItem::EExp(raw_eexp) = reader.next().unwrap() { | ||
num_top_level_values += 1; | ||
// Look up the e-expression's invoked macro ID in the encoding context. | ||
let eexp = raw_eexp.resolve(context_ref).unwrap(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🪧 The
simd
feature enables thememchr
operation when scanning for an expected token.