Skip to content

Commit

Permalink
Benches on typed nodes (#15)
Browse files Browse the repository at this point in the history
* Add benchmarks for types in `pest3::typed`.

* Benchmarking both parsing and checking.

* CI: use my project name.

Fix this when ready to open a PR.

* Fix attribute position.

In `pest/benches/position.rs`.

* Update some attributes and comments.

* Try to optimize `pest3::typed::Char`.

* Try to optimize `pest3::typed::Char` again.

Encode the character into a buffer of 4 bytes to avoid dynamic allocation.

* Simplify the implementation of `pest3::typed::ANY`.

* Try to use constant evaluation.

* Revert "Try to use constant evaluation."

This reverts commit 47c2a0e.

---------

Co-authored-by: TheVeryDarkness <[email protected]>
  • Loading branch information
tomtau and TheVeryDarkness authored Jul 22, 2024
1 parent 4af099c commit 1bb36b1
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Track base branch benchmarks with Bencher
run: |
bencher run \
--project pest3-zhacqvrk \
--project pest3 \
--token '${{ secrets.BENCHER_API_TOKEN }}' \
--branch ${{ github.ref_name }} \
--testbed ubuntu-latest \
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ pest3_meta = { path = "./meta", package = "pest3_meta" }
pest3_generator = { path = "./generator", package = "pest3_generator" }
unicode-width = { version = "0.1.13" }
pest2 = { version = "2.7.8", package = "pest" }
pest2_derive = { version = "2.7.8", package = "pest_derive" }
pest2_derive = { version = "2.7.8", package = "pest_derive" }
criterion = "0.5.1"
2 changes: 1 addition & 1 deletion derive/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ pest3_generator = { path = "../generator", version = "= 0.0.0-prealpha0" }
[dev-dependencies]
pest3 = { path = "../pest", version = "= 0.0.0-prealpha0" }
anyhow = "1"
criterion = "0.5.1"
serde_json = "1.0.117"
criterion.workspace = true

[[bench]]
name = "json"
Expand Down
7 changes: 7 additions & 0 deletions pest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,10 @@ license.workspace = true
[dependencies]
unicode-width.workspace = true
pest2.workspace = true

[dev-dependencies]
criterion.workspace = true

[[bench]]
name = "position"
harness = false
167 changes: 167 additions & 0 deletions pest/benches/position.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
//! This benchmark is designed to test the performance of the `TypedNode` trait.
#![allow(non_camel_case_types)]

use std::time::Duration;

use criterion::{criterion_group, criterion_main, Criterion};
use pest3::{
choice::{Choice16, Choice2},
sequence::{Sequence16, Sequence2},
typed::{
template::{
Char, CharRange, Empty, Insens, RepMinMax, SkipChar, Str, ANY, PEEK, PEEK_ALL, PUSH,
},
wrapper, RuleType, TypedNode,
},
};

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum Rule {
EOI,
}

impl RuleType for Rule {
const EOI: Self = Self::EOI;
}

const TIMES: usize = 0x10000usize;
const FRAG: &'static str = "0123456789abcdef";
const LEN: usize = 16;
const TOTAL: usize = TIMES * LEN;

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct S;
impl wrapper::String for S {
const CONTENT: &'static str = FRAG;
}

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct I;
impl wrapper::String for I {
const CONTENT: &'static str = "0123456789ABCDEF";
}

fn benchmark(b: &mut Criterion) {
let input = FRAG.repeat(TIMES);

mod types {
use super::*;
pub type any = RepMinMax<ANY, Empty, TOTAL, TOTAL>;
pub type choices_16 = RepMinMax<
Choice16<
Char<'0'>,
Char<'1'>,
Char<'2'>,
Char<'3'>,
Char<'4'>,
Char<'5'>,
Char<'6'>,
Char<'7'>,
Char<'8'>,
Char<'9'>,
Char<'a'>,
Char<'b'>,
Char<'c'>,
Char<'d'>,
Char<'e'>,
Char<'f'>,
>,
Empty,
TOTAL,
TOTAL,
>;
pub type sequence_16 = RepMinMax<
Sequence16<
Char<'0'>,
Empty,
Char<'1'>,
Empty,
Char<'2'>,
Empty,
Char<'3'>,
Empty,
Char<'4'>,
Empty,
Char<'5'>,
Empty,
Char<'6'>,
Empty,
Char<'7'>,
Empty,
Char<'8'>,
Empty,
Char<'9'>,
Empty,
Char<'a'>,
Empty,
Char<'b'>,
Empty,
Char<'c'>,
Empty,
Char<'d'>,
Empty,
Char<'e'>,
Empty,
Char<'f'>,
Empty,
>,
Empty,
TIMES,
TIMES,
>;
pub type strings = RepMinMax<Str<S>, Empty, TIMES, TIMES>;
pub type insensitive_strings<'i> = RepMinMax<Insens<'i, I>, Empty, TIMES, TIMES>;
pub type range = RepMinMax<CharRange<'0', 'f'>, Empty, TOTAL, TOTAL>;
pub type range2 =
RepMinMax<Choice2<CharRange<'0', '9'>, CharRange<'a', 'f'>>, Empty, TOTAL, TOTAL>;
pub type skip_fragments<'i> = RepMinMax<SkipChar<'i, LEN>, Empty, TIMES, TIMES>;
pub type skip_all<'i> = SkipChar<'i, TOTAL>;
pub type push = RepMinMax<PUSH<Str<S>>, Empty, TIMES, TIMES>;
pub type push_peek<'i> = Sequence2<
PUSH<Str<S>>,
Empty,
RepMinMax<PEEK<'i>, Empty, { TIMES - 1 }, { TIMES - 1 }>,
Empty,
>;
pub type push_peek_all<'i> = Sequence2<
PUSH<Str<S>>,
Empty,
RepMinMax<PEEK_ALL<'i>, Empty, { TIMES - 1 }, { TIMES - 1 }>,
Empty,
>;
}
macro_rules! test_series {
($($name:ident),*) => {
b
$(
.bench_function(stringify!(parse - $name), |b| b.iter(|| {
<types::$name as TypedNode<'_, Rule>>::try_parse(&input).unwrap()
}))
.bench_function(stringify!(check - $name), |b| b.iter(|| {
<types::$name as TypedNode<'_, Rule>>::check(&input).unwrap()
}))
)*
};
}
test_series!(
any,
choices_16,
sequence_16,
range,
range2,
strings,
insensitive_strings,
skip_fragments,
skip_all,
push,
push_peek,
push_peek_all
);
}

criterion_group!(
name = benchmarks;
config = Criterion::default().measurement_time(Duration::from_secs(10));
targets = benchmark
);
criterion_main!(benchmarks);
15 changes: 12 additions & 3 deletions pest/src/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
//!
//! [pest-typed/position.rs]: https://github.com/TheVeryDarkness/pest-typed/blob/0.12.1/main/src/position.rs
#![allow(dead_code)]

use core::cmp::Ordering;
use core::fmt::{self, Write};
use core::hash::{Hash, Hasher};
Expand Down Expand Up @@ -308,6 +306,7 @@ impl<'i> Position<'i> {
/// Skips until one of the given `strings` is found. If none of the `strings` can be found,
/// this function will return `false` but its `pos` will *still* be updated.
#[inline]
#[allow(dead_code)]
pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool {
#[cfg(not(feature = "memchr"))]
{
Expand Down Expand Up @@ -361,6 +360,7 @@ impl<'i> Position<'i> {
}

#[inline]
#[allow(dead_code)]
fn skip_until_basic(&mut self, strings: &[&str]) -> bool {
// TODO: optimize with Aho-Corasick, e.g. https://crates.io/crates/daachorse?
for from in self.pos..self.input.len() {
Expand All @@ -383,6 +383,16 @@ impl<'i> Position<'i> {
false
}

/// Returns the char at the `Position` and updates `pos` to the next char, if there is one.
/// Otherwise, returns `None`.
#[inline]
#[allow(dead_code)]
pub(crate) fn next_char(&mut self) -> Option<char> {
let c = self.input[self.pos..].chars().next();
self.pos += c.map_or(0, char::len_utf8);
c
}

/// Matches the char at the `Position` against a specified character and returns `true` if a match
/// was made. If no match was made, returns `false`.
/// `pos` will not be updated in either case.
Expand All @@ -395,7 +405,6 @@ impl<'i> Position<'i> {
/// Matches the char at the `Position` against a filter function and returns `true` if a match
/// was made. If no match was made, returns `false` and `pos` will not be updated.
#[inline]
#[allow(dead_code)]
pub(crate) fn match_char_by<F>(&mut self, f: F) -> bool
where
F: FnOnce(char) -> bool,
Expand Down
16 changes: 7 additions & 9 deletions pest/src/typed/template/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ impl<'i, R: RuleType, const CHAR: char> TypedNode<'i, R> for Char<CHAR> {
_stack: &mut Stack<Span<'i>>,
_tracker: &mut Tracker<'i, R>,
) -> Option<(Position<'i>, Self)> {
match input.match_char_by(|c| c == CHAR) {
let mut buf = [0u8; 4];
match input.match_string(CHAR.encode_utf8(&mut buf)) {
true => Some((input, Self)),
false => None,
}
Expand All @@ -245,7 +246,8 @@ impl<'i, R: RuleType, const CHAR: char> TypedNode<'i, R> for Char<CHAR> {
_stack: &mut Stack<Span<'i>>,
_tracker: &mut Tracker<'i, R>,
) -> Option<Position<'i>> {
match input.match_char_by(|c| c == CHAR) {
let mut buf = [0u8; 4];
match input.match_string(CHAR.encode_utf8(&mut buf)) {
true => Some(input),
false => None,
}
Expand Down Expand Up @@ -469,13 +471,9 @@ impl<'i, R: RuleType> TypedNode<'i, R> for ANY {
_stack: &mut Stack<Span<'i>>,
_tracker: &mut Tracker<'i, R>,
) -> Option<(Position<'i>, Self)> {
let mut c: char = ' ';
match input.match_char_by(|ch| {
c = ch;
true
}) {
true => Some((input, Self { content: c })),
false => None,
match input.next_char() {
Some(c) => Some((input, Self { content: c })),
None => None,
}
}
#[inline]
Expand Down

0 comments on commit 1bb36b1

Please sign in to comment.