Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unnecessary unsafe functions #998

Merged
merged 1 commit into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pest/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ impl<R: RuleType> Error<R> {
};
let error = Error::new_from_pos(
ErrorVariant::CustomError { message },
Position::new(input, error_position).unwrap(),
Position::new_internal(input, error_position),
);
Some(error)
}
Expand Down
36 changes: 13 additions & 23 deletions pest/src/iterators/flat_pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,14 @@ use crate::RuleType;
/// [`Pair`]: struct.Pair.html
/// [`Pairs::flatten`]: struct.Pairs.html#method.flatten
pub struct FlatPairs<'i, R> {
/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
start: usize,
end: usize,
line_index: Rc<LineIndex>,
}

/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
pub unsafe fn new<'i, R: RuleType>(
pub fn new<'i, R: RuleType>(
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
start: usize,
Expand Down Expand Up @@ -117,14 +111,12 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
return None;
}

let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.start,
)
};
let pair = pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.start,
);
self.next_start();

Some(pair)
Expand All @@ -144,14 +136,12 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {

self.next_start_from_end();

let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.end,
)
};
let pair = pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.end,
);

Some(pair)
}
Expand Down
11 changes: 2 additions & 9 deletions pest/src/iterators/pair.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,14 @@ use crate::RuleType;
/// [`Token`]: ../enum.Token.html
#[derive(Clone)]
pub struct Pair<'i, R> {
/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
/// Token index into `queue`.
start: usize,
line_index: Rc<LineIndex>,
}

/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
pub unsafe fn new<'i, R: RuleType>(
pub fn new<'i, R: RuleType>(
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
line_index: Rc<LineIndex>,
Expand Down Expand Up @@ -210,8 +204,7 @@ impl<'i, R: RuleType> Pair<'i, R> {
let start = self.pos(self.start);
let end = self.pos(self.pair());

// Generated positions always come from Positions and are UTF-8 borders.
unsafe { span::Span::new_unchecked(self.input, start, end) }
span::Span::new_internal(self.input, start, end)
}

/// Get current node tag
Expand Down
30 changes: 13 additions & 17 deletions pest/src/iterators/pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ impl<'i, R: RuleType> Pairs<'i, R> {
/// ```
#[inline]
pub fn flatten(self) -> FlatPairs<'i, R> {
unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) }
flat_pairs::new(self.queue, self.input, self.start, self.end)
}

/// Finds the first pair that has its node or branch tagged with the provided
Expand Down Expand Up @@ -347,14 +347,12 @@ impl<'i, R: RuleType> Pairs<'i, R> {
#[inline]
pub fn peek(&self) -> Option<Pair<'i, R>> {
if self.start < self.end {
Some(unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.start,
)
})
Some(pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.start,
))
} else {
None
}
Expand Down Expand Up @@ -427,14 +425,12 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
self.end = self.pair_from_end();
self.pairs_count -= 1;

let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.end,
)
};
let pair = pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.end,
);

Some(pair)
}
Expand Down
20 changes: 6 additions & 14 deletions pest/src/iterators/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,12 @@ use crate::RuleType;
/// [`Pairs::tokens`]: struct.Pairs.html#method.tokens
#[derive(Clone)]
pub struct Tokens<'i, R> {
/// # Safety:
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
start: usize,
end: usize,
}

// TODO(safety): QueueableTokens must be valid indices into input.
pub fn new<'i, R: RuleType>(
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
Expand All @@ -46,7 +42,7 @@ pub fn new<'i, R: RuleType>(
QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
assert!(
input.get(input_pos..).is_some(),
"💥 UNSAFE `Tokens` CREATED 💥"
"💥 INVALID `Tokens` CREATED 💥"
)
}
}
Expand Down Expand Up @@ -75,19 +71,15 @@ impl<'i, R: RuleType> Tokens<'i, R> {

Token::Start {
rule,
// QueueableTokens are safely created.
pos: unsafe { position::Position::new_unchecked(self.input, input_pos) },
pos: position::Position::new_internal(self.input, input_pos),
}
}
QueueableToken::End {
rule, input_pos, ..
} => {
Token::End {
rule,
// QueueableTokens are safely created.
pos: unsafe { position::Position::new_unchecked(self.input, input_pos) },
}
}
} => Token::End {
rule,
pos: position::Position::new_internal(self.input, input_pos),
},
}
}
}
Expand Down
3 changes: 1 addition & 2 deletions pest/src/parser_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,7 @@ where

Err(Error::new_from_pos_with_parsing_attempts(
variant,
// TODO(performance): Guarantee state.attempt_pos is a valid position
Position::new(input, state.attempt_pos).unwrap(),
Position::new_internal(input, state.attempt_pos),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding unit tests specifically targeting the error handling behavior of Position::new_internal to ensure its functionality aligns with expectations and to catch any potential edge cases.

Would you like me to help by drafting some unit tests for this change?

state.parse_attempts.clone(),
))
}
Expand Down
12 changes: 2 additions & 10 deletions pest/src/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,12 @@ use crate::span;
#[derive(Clone, Copy)]
pub struct Position<'i> {
input: &'i str,
/// # Safety:
///
/// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
pos: usize,
}

impl<'i> Position<'i> {
/// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.)
///
/// # Safety:
///
/// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think some of these comments may still be valid that the caller is responsible for providing that valid pos or start..end indices, because the access is sometimes done directly via self.input[pos] instead of self.input.get(pos)

pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position<'_> {
pub(crate) fn new_internal(input: &str, pos: usize) -> Position<'_> {
debug_assert!(input.get(pos..).is_some());
Position { input, pos }
}
Expand Down Expand Up @@ -106,8 +99,7 @@ impl<'i> Position<'i> {
if ptr::eq(self.input, other.input)
/* && self.input.get(self.pos..other.pos).is_some() */
{
// This is safe because the pos field of a Position should always be a valid str index.
unsafe { span::Span::new_unchecked(self.input, self.pos, other.pos) }
span::Span::new_internal(self.input, self.pos, other.pos)
} else {
// TODO: maybe a panic if self.pos < other.pos
panic!("span created from positions from different inputs")
Expand Down
23 changes: 5 additions & 18 deletions pest/src/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,13 @@ use crate::position;
#[derive(Clone, Copy)]
pub struct Span<'i> {
input: &'i str,
/// # Safety
///
/// Must be a valid character boundary index into `input`.
start: usize,
/// # Safety
///
/// Must be a valid character boundary index into `input`.
end: usize,
}

impl<'i> Span<'i> {
/// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
///
/// # Safety
///
/// `input[start..end]` must be a valid subslice; that is, said indexing should not panic.
pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> {
pub(crate) fn new_internal(input: &str, start: usize, end: usize) -> Span<'_> {
debug_assert!(input.get(start..end).is_some());
Span { input, start, end }
}
Expand Down Expand Up @@ -144,8 +134,7 @@ impl<'i> Span<'i> {
/// ```
#[inline]
pub fn start_pos(&self) -> position::Position<'i> {
// Span's start position is always a UTF-8 border.
unsafe { position::Position::new_unchecked(self.input, self.start) }
position::Position::new_internal(self.input, self.start)
}

/// Returns the `Span`'s end `Position`.
Expand All @@ -163,8 +152,7 @@ impl<'i> Span<'i> {
/// ```
#[inline]
pub fn end_pos(&self) -> position::Position<'i> {
// Span's end position is always a UTF-8 border.
unsafe { position::Position::new_unchecked(self.input, self.end) }
position::Position::new_internal(self.input, self.end)
}

/// Splits the `Span` into a pair of `Position`s.
Expand All @@ -182,9 +170,8 @@ impl<'i> Span<'i> {
/// ```
#[inline]
pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
// Span's start and end positions are always a UTF-8 borders.
let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) };
let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) };
let pos1 = position::Position::new_internal(self.input, self.start);
let pos2 = position::Position::new_internal(self.input, self.end);

(pos1, pos2)
}
Expand Down
Loading