From 5c9db5ae1aa330e21acb69c20f060a9b900c8886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nadir=20Fejzi=C4=87?= Date: Mon, 23 Oct 2023 12:08:38 +0200 Subject: [PATCH] fix: avoid collecting iterator during token resolving (#112) * fix: avoid collecting iterator in `TokenResolver` * fix: add test cases for bold ambiguous start and end * fix: remove unnecessary checks on symbol flattening * fix: remove redundant check for interruption of tokens * fix: use global substitutor to prevent repeated allocs * fix: remove dead code * fix: optimize resolving interrupted tokens as plain * fix: use `fxhash` for faster hashing in substitutor * fix: re-enable optimized debug assertions in Symbol::flatten * fix: use alias for TokenMap keys * chore: remove unused dependency * fix: don't clone iterator if there's no need to --- Cargo.toml | 1 + commons/src/scanner/symbol/mod.rs | 42 +-- inline/Cargo.toml | 2 + inline/src/inlines/substitute.rs | 23 +- inline/src/lexer/mod.rs | 6 +- inline/src/lexer/resolver/mod.rs | 307 +++++++++++------- inline/src/lexer/resolver/raw_token.rs | 63 +++- inline/src/lexer/token.rs | 12 +- inline/tests/spec/markup/bold.yml | 20 ++ .../snapshots/lexer/bold/ambiguous-end.snap | 38 +++ .../snapshots/lexer/bold/ambiguous-start.snap | 38 +++ .../snapshots/parser/bold/ambiguous-end.snap | 25 ++ .../parser/bold/ambiguous-start.snap | 25 ++ 13 files changed, 443 insertions(+), 159 deletions(-) create mode 100644 inline/tests/spec/snapshots/lexer/bold/ambiguous-end.snap create mode 100644 inline/tests/spec/snapshots/lexer/bold/ambiguous-start.snap create mode 100644 inline/tests/spec/snapshots/parser/bold/ambiguous-end.snap create mode 100644 inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap diff --git a/Cargo.toml b/Cargo.toml index e2c07da3..4a8703c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,3 +35,4 @@ clap = { version = "4.2.7", features = ["derive", "cargo", "env"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.8.23" +ribbon = "0.7.0" diff --git a/commons/src/scanner/symbol/mod.rs b/commons/src/scanner/symbol/mod.rs index 72ce8f30..040e3b18 100644 --- a/commons/src/scanner/symbol/mod.rs +++ b/commons/src/scanner/symbol/mod.rs @@ -151,31 +151,35 @@ impl Symbol<'_> { } /// Flattens the input of consecutive symbols. Returns the slice of input starting from start - /// position of first symbol until the end of last symbol. + /// position of first symbol until the end of last symbol. Returns [`None`] if slice is empty. + /// + /// # Panics + /// + /// It's assumed that all [`Symbol`]s in slice reference the same input. If not, the function + /// might panic (guaranteed in debug) if inputs are not the same and last [`Symbol`] in slice + /// references input that is longer than the one referenced in the first [`Symbol`]. + /// + /// # Examples /// - /// Returns `None` if the referenced input in the given symbols is not the same. + /// ``` + /// use unimarkup_commons::scanner::{scan_str, Symbol}; + /// + /// let input = "This is a string"; + /// let symbols: Vec<_> = scan_str(input); + /// + /// assert_eq!(input, Symbol::flatten(&symbols).unwrap()); + /// ``` pub fn flatten(symbols: &[Self]) -> Option<&str> { - debug_assert!(symbols - .windows(2) - .all(|window| window[0].input == window[1].input)); - - if symbols.is_empty() { - return Some(""); - } + let (first, last) = (symbols.first()?, symbols.last()?); - let first = symbols.first()?; - let last = symbols.last()?; + debug_assert_eq!(first.input, last.input); - if first.input == last.input { - let input = first.input; + let input = first.input; - let start = first.offset.start; - let end = last.offset.end; + let start = first.offset.start; + let end = last.offset.end; - Some(&input[start..end]) - } else { - None - } + Some(&input[start..end]) } /// Flattens the iterator of consecutive symbols. Returns the slice of input starting from start diff --git a/inline/Cargo.toml b/inline/Cargo.toml index c9f5aba9..c13bb293 100644 --- a/inline/Cargo.toml +++ b/inline/Cargo.toml @@ -18,7 +18,9 @@ harness=false [dependencies] logid.workspace = true +ribbon.workspace = true unimarkup-commons = { path = "../commons/", version = "0" } +fxhash = "0.2.1" [dev-dependencies] unimarkup-commons = { path ="../commons/", version = "0", features = ["test_runner"] } diff --git a/inline/src/inlines/substitute.rs b/inline/src/inlines/substitute.rs index 1fffef5c..9bc63cfd 100644 --- a/inline/src/inlines/substitute.rs +++ b/inline/src/inlines/substitute.rs @@ -1,5 +1,8 @@ -use std::collections::{HashMap, HashSet}; +//! Substitutor and constants that can be substituted in Unimarkup content. +use fxhash::{FxHashMap, FxHashSet}; + +use logid::evident::once_cell::sync::Lazy; use unimarkup_commons::scanner::{self, span::Span}; /// ASCII Emojis that can be replaced with their Unicode versions in a Unimarkup text. @@ -75,20 +78,22 @@ pub const ALIASES: [(&str, &str); 20] = [ #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct Substitutor<'a> { - direct: HashMap<&'a str, &'a str>, - aliased: HashMap<&'a str, &'a str>, + direct: FxHashMap<&'a str, &'a str>, + aliased: FxHashMap<&'a str, &'a str>, max_len: usize, - first_grapheme: HashSet<&'a str>, + first_grapheme: FxHashSet<&'a str>, } +static GLOBAL: Lazy = Lazy::new(Substitutor::create_global); + impl<'sub> Substitutor<'sub> { - pub(crate) fn new() -> Self { - let direct: HashMap<_, _> = EMOJIS.into_iter().chain(ARROWS).collect(); + fn create_global() -> Substitutor<'static> { + let direct: FxHashMap<_, _> = EMOJIS.into_iter().chain(ARROWS).collect(); let aliased = ALIASES.into_iter().collect(); let max_len = direct.keys().map(|key| key.len()).max().unwrap_or(0); let first_grapheme = direct.keys().map(|key| &key[0..1]).collect(); - Self { + Substitutor { direct, aliased, max_len, @@ -96,6 +101,10 @@ impl<'sub> Substitutor<'sub> { } } + pub fn global() -> &'static Substitutor<'static> { + &GLOBAL + } + pub(crate) fn get_subst(&self, slice: &'sub str, span: Span) -> Option> { let content = *self.direct.get(slice)?; diff --git a/inline/src/lexer/mod.rs b/inline/src/lexer/mod.rs index f9f18ea2..4ec235d8 100644 --- a/inline/src/lexer/mod.rs +++ b/inline/src/lexer/mod.rs @@ -168,7 +168,7 @@ impl<'token> Lexer<'token> { TokenIterator { symbols, index: 0, - substitutor: Substitutor::new(), + substitutor: Substitutor::global(), } } @@ -246,7 +246,7 @@ pub struct TokenIterator<'input> { /// [`Substitutor`] used for resolving inline substitutions. Right now, substitutor uses only /// built-in substitutions and has 'static lifetime per default, and can be shortened to any /// other lifetime. - substitutor: Substitutor<'input>, + substitutor: &'static Substitutor<'static>, } impl<'input> TokenIterator<'input> { @@ -558,7 +558,7 @@ impl<'input> Iterator for TokenIterator<'input> { // 3. next grapheme is not a keyword -> it is plain text match self.get_symbol(self.index) { - Some(symbol) if symbol.is_keyword() || symbol.is_start_of_subst(&self.substitutor) => { + Some(symbol) if symbol.is_keyword() || symbol.is_start_of_subst(self.substitutor) => { self.lex_keyword() } Some(symbol) if symbol.is_esc() => { diff --git a/inline/src/lexer/resolver/mod.rs b/inline/src/lexer/resolver/mod.rs index fd1c64d2..766681aa 100644 --- a/inline/src/lexer/resolver/mod.rs +++ b/inline/src/lexer/resolver/mod.rs @@ -1,6 +1,8 @@ +use ribbon::{Enroll, Ribbon, Tape}; + use std::{ - collections::{btree_map::Entry, BTreeMap}, - ops::Range, + collections::{btree_map, BTreeMap}, + iter::Map, vec, }; @@ -12,12 +14,13 @@ pub(crate) use raw_token::*; type Scope = usize; type Indices = Vec; +type TokenMapKey = (Scope, TokenKind); #[derive(Debug, Clone)] #[repr(transparent)] /// Internal data structure for storing [`Indices`] of [`TokenKind`]s in specific [`Scope`]. struct TokenMap { - map: BTreeMap<(TokenKind, Scope), Indices>, + map: BTreeMap, } impl TokenMap { @@ -41,17 +44,39 @@ impl TokenMap { .or_insert_with(|| vec![index]); } - fn entry(&mut self, kind: TokenKind, scope: Scope) -> Entry<(TokenKind, Scope), Indices> { - let key = (Self::general_key(kind), scope); + fn entry(&mut self, kind: TokenKind, scope: Scope) -> btree_map::Entry { + let key = Self::create_key(kind, scope); self.map.entry(key) } + fn entries(&mut self) -> btree_map::IterMut<'_, TokenMapKey, Indices> { + self.map.iter_mut() + } + fn get_mut(&mut self, kind: TokenKind, scope: Scope) -> Option<&mut Indices> { - let key = (Self::general_key(kind), scope); + let key = Self::create_key(kind, scope); self.map.get_mut(&key) } + + fn create_key(kind: TokenKind, scope: Scope) -> TokenMapKey { + (scope, Self::general_key(kind)) + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +struct IdxRef<'indices> { + idx: usize, + indices: &'indices mut Indices, } +impl IdxRef<'_> { + fn delete(self) { + self.indices.remove(self.idx); + } +} + +type TokenIter<'token> = Map, fn(crate::Token) -> RawToken>; + /// Resolver of [`RawToken`]s, finds pairs of open/close tokens and marks them as such. If no pairs /// are found, tokens are marked as plain. /// @@ -66,136 +91,203 @@ impl TokenMap { /// - Every time a token pair is matched, all non-resolved tokens between them are marked as plain #[derive(Debug, Clone)] pub(crate) struct TokenResolver<'token> { + /// Scopes are introduced by brackets (for example text groups). curr_scope: usize, - interrupted: Vec>, - pub(crate) tokens: Vec>, + + /// Tape that enables expanding *visible* context inside of the TokenIterator. + pub(crate) tape: Tape>, + + /// The index of the `Token` at head of `Tape`. That is, the number of the `Token` that will be + /// returned by the resolver, with 0 being the number of the first `Token`. + tape_idx: usize, + + /// Mapping of `TokenKind` to indices of `Token`s that are not yet resolved. + unresolved: TokenMap, } impl<'token> TokenResolver<'token> { pub(crate) fn new(iter: TokenIterator<'token>) -> Self { - let mut new = Self { - curr_scope: 0, - interrupted: Vec::default(), - tokens: iter.map(RawToken::new).collect(), - }; + let tape = iter.map(RawToken::new as _).tape(); - new.resolve(); - new + Self { + curr_scope: 0, + tape, + tape_idx: 0, + unresolved: TokenMap::new(), + } } - fn resolve(&mut self) { - // map found tokens to their index in tokens vector - let mut token_map: TokenMap = TokenMap::new(); - - for index in 0..self.tokens.len() { - // on open/close bracket simply increment/decrement scope - if self.tokens[index].token.kind.is_open_bracket() { - self.curr_scope += 1; - continue; - } else if self.tokens[index].token.kind.is_close_bracket() { - // scope < 0 is user input error - // TODO: report this as a warning or an error - self.curr_scope = self.curr_scope.saturating_sub(1); - continue; - } - - // try to resolve token - if let Some(begin_index) = self.resolve_token(&mut token_map, index) { - // open tokens from begin_index to index are interrupted - self.interrupted.push((begin_index + 1)..index); + fn next_token(&mut self) -> Option> { + // idea: + // + // * First token resolved -> Pop and return it + // * First token not resolved: + // - expand tape until matching closing token is found + // - try to resolve the first token + // - repeat until whole token is resolved (two resolving needed for compound token) + // - pop and return it + + let mut looped = false; + let mut expanded = false; + loop { + match self.tape.peek_front() { + Some(t) => { + if t.token.kind.is_parenthesis() { + if t.token.kind.is_open_bracket() { + self.curr_scope += 1; + } else { + // TODO: syntax error, report to user + let _ = self.curr_scope.saturating_sub(1); + } + + self.tape.pop_front(); + self.tape_idx += 1; + + continue; + } else if t.is_resolved() { + self.tape_idx += 1; + return self.tape.pop_front(); + } else if (t.token.closes(None) && !t.token.opens()) || !expanded { + // token not resolved, but it's either: + // * closing token with no tokens before it, and is not an opening token + // -> is plain token + // + // * or token not resolved, but there are no more tokens to check, + // -> it can't be resolved, so it's a plain token + return self.tape.pop_front(); + } + } + None => { + // no new tokens available, token at the head of tape cannot be resolved + if looped && !expanded { + return None; + } + } } - if !self.tokens[index].state { - let kind = self.tokens[index].token.kind; - // save positions of every unresolved token - token_map.update_or_insert(kind, index, self.curr_scope); - } + expanded = self.try_resolve(); + looped = true; } } - fn resolve_token(&mut self, token_map: &mut TokenMap, index: usize) -> Option { + fn try_resolve(&mut self) -> bool { // multiple cases for current - unresolved token relationship: // 1. current IS ambiguous, there is unresolved one that IS ambiguous (ambiguous, ambiguous) // 2. current IS ambiguous, there is unresolved one that IS NOT ambiguous (simple, ambiguous) // 3. current NOT ambiguous, there is unresolved one that IS NOT ambiguous: (simple, simple) // 4. current NOT ambiguous, there is unresolved one that IS ambiguous (ambiguous, simple) - if self.tokens[index].token.closes(None) { - if self.tokens[index].token.is_ambiguous() { - return self.resolve_compound_token(token_map, index); - } else { - return self.resolve_simple_token(token_map, index); + let expanded = self.tape.expand(); + + let resolved_idx = match self.tape.peek_back() { + None => return false, + + Some(end) => { + if !end.token.closes(None) { + // if it's not closing, it cannot resolve opened token + None + } else if end.token.is_ambiguous() { + self.resolve_compound_token() + } else { + self.resolve_simple_token() + } + } + }; + + match resolved_idx { + Some(idx) => { + let tail_idx = self.tape_idx + self.tape.len() - 1; + + for (_, indices) in self.unresolved.entries() { + indices.retain(|i| *i > idx && *i < tail_idx); + } + } + None => { + if let Some(end) = self.tape.peek_back() { + if end.token.opens() && !end.token.kind.is_parenthesis() { + self.unresolved.update_or_insert( + end.token.kind, + self.tape_idx + self.tape.len() - 1, + self.curr_scope, + ); + } + } } } - None + expanded } - fn resolve_simple_token(&mut self, token_map: &mut TokenMap, index: usize) -> Option { - let token_kind = self.tokens[index].token.kind; + fn resolve_simple_token(&mut self) -> Option { + let token_kind = self.tape.peek_back()?.token.kind; - let indices = token_map.get_mut(token_kind, self.curr_scope)?; - let (unr_token, i, token_index) = self.find_first_matching(indices, index)?; + let (unr_token, idx_ref, token_index) = self.find_first_matching(token_kind)?; if unr_token.token.is_ambiguous() { // opening token IS ambiguous (ambiguous, simple) unr_token.split_ambiguous(); - if unr_token.token.kind != token_kind { - unr_token.set_tail_state(Resolved::Open); - } else { - unr_token.set_head_state(Resolved::Open); + if unr_token.token.kind == token_kind { + // make sure resolved part is in tail unr_token.swap_parts(); } - self.tokens[index].set_head_state(Resolved::Close); + unr_token.set_tail_state(State::Open); + + self.tape.peek_back_mut()?.set_head_state(State::Close); + Some(token_index) } else { - // opening token IS NOT ambiguous, (simple, simple) case - unr_token.set_head_state(Resolved::Open); - self.tokens[index].set_head_state(Resolved::Close); + idx_ref.delete(); + + // opening token IS NOT ambiguous, only head available so mark it appropriately + unr_token.set_head_state(State::Open); + + let curr_token = self.tape.peek_back_mut()?; + + curr_token.set_head_state(State::Close); if let Some(RawToken { - state: Resolved::Close, + state: State::Close, .. - }) = self.tokens[index].tail.as_deref() + }) = curr_token.tail.as_deref() { - self.tokens[index].swap_parts(); + curr_token.swap_parts(); } - indices.remove(i); Some(token_index) } } - fn resolve_compound_token(&mut self, token_map: &mut TokenMap, index: usize) -> Option { - let token_kind = self.tokens[index].token.kind; - let indices = token_map.get_mut(token_kind, self.curr_scope)?; - let (unr_token, i, token_index) = self.find_first_matching(indices, index)?; + fn resolve_compound_token(&mut self) -> Option { + let token_kind = self.tape.peek_back()?.token.kind; + let (unr_token, idx_ref, token_index) = self.find_first_matching(token_kind)?; if unr_token.token.is_ambiguous() { + idx_ref.delete(); // there is unresolved one that IS ambiguous (ambiguous, ambiguous) unr_token.split_ambiguous(); let unr_kind = unr_token.token.kind; - unr_token.set_state(Resolved::Open); + unr_token.set_state(State::Open); + + let curr_token = self.tape.peek_back_mut()?; - self.tokens[index].split_ambiguous(); - self.tokens[index].set_state(Resolved::Close); + curr_token.split_ambiguous(); + curr_token.set_state(State::Close); // make sure the parts are symmetric - if self.tokens[index].token.kind == unr_kind { - self.tokens[index].swap_parts(); + if curr_token.token.kind == unr_kind { + curr_token.swap_parts(); } - indices.remove(i); return Some(token_index); } else { // there is unresolved one that IS NOT ambiguous (simple, ambiguous) let kind = unr_token.token.kind; - if let Some(token_index) = self.resolve_partial(indices, index, kind) { + if let Some(token_index) = self.resolve_partial(kind) { // try to resolve the remaining part - self.resolve_token(token_map, index); + self.try_resolve(); return Some(token_index); } } @@ -203,50 +295,49 @@ impl<'token> TokenResolver<'token> { None } - fn resolve_partial( - &mut self, - indices: &mut Indices, - index: usize, - kind: TokenKind, - ) -> Option { - if let Some((unr_token, i, token_index)) = self.find_first_matching(indices, index) { - unr_token.set_head_state(Resolved::Open); + fn resolve_partial(&mut self, kind: TokenKind) -> Option { + let (unr_token, idx_ref, token_index) = self.find_first_matching(kind)?; - let curr_token = &mut self.tokens[index]; + idx_ref.delete(); - curr_token.split_ambiguous(); + unr_token.set_head_state(State::Open); - if curr_token.token.kind != kind { - curr_token.set_tail_state(Resolved::Close); - } else { - curr_token.set_head_state(Resolved::Close); - curr_token.swap_parts(); - } + let curr_token = self.tape.peek_back_mut()?; - indices.remove(i); + curr_token.split_ambiguous(); - Some(token_index) + if curr_token.token.kind != kind { + curr_token.set_tail_state(State::Close); } else { - None + curr_token.set_head_state(State::Close); + curr_token.swap_parts(); } + + Some(token_index) } fn find_first_matching( &mut self, - indices: &Indices, - curr_idx: usize, - ) -> Option<(&mut RawToken<'token>, usize, usize)> { + token_kind: TokenKind, + ) -> Option<(&mut RawToken<'token>, IdxRef, usize)> { + let indices = self.unresolved.get_mut(token_kind, self.curr_scope)?; + // find first unresolved token for (i, idx) in indices.iter().enumerate() { - let curr_token = &self.tokens[curr_idx]; - let token = &self.tokens[*idx]; + if *idx < self.tape_idx { + // token already resolved + indices.remove(i); + return None; + } - if !token.state && token.token.overlaps(&curr_token.token) && token.token.opens() { - if self.interrupted.iter().any(|range| range.contains(idx)) { - return None; - } + let idx = *idx - self.tape_idx; // offset the index as tape progressed - return Some((&mut self.tokens[*idx], i, *idx)); + let curr_token = &self.tape.peek_back()?; + let token = self.tape.peek_at(idx)?; + + if !token.state && token.token.overlaps(&curr_token.token) && token.token.opens() { + let idx_ref = IdxRef { idx: i, indices }; + return Some((self.tape.peek_at_mut(idx)?, idx_ref, idx + self.tape_idx)); } } @@ -254,21 +345,19 @@ impl<'token> TokenResolver<'token> { } pub(crate) fn into_iter(self) -> IntoIter<'token> { - IntoIter { - iter: self.tokens.into_iter(), - } + IntoIter { inner: self } } } #[derive(Debug, Clone)] pub(crate) struct IntoIter<'token> { - iter: vec::IntoIter>, + inner: TokenResolver<'token>, } impl<'token> Iterator for IntoIter<'token> { type Item = RawToken<'token>; fn next(&mut self) -> Option { - self.iter.next() + self.inner.next_token() } } diff --git a/inline/src/lexer/resolver/raw_token.rs b/inline/src/lexer/resolver/raw_token.rs index 25c6491c..a307ba79 100644 --- a/inline/src/lexer/resolver/raw_token.rs +++ b/inline/src/lexer/resolver/raw_token.rs @@ -4,32 +4,56 @@ use crate::{Spacing, SpanExt, Token, TokenKind}; // Token can either be opening one, closing one, or neither #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum Resolved { +pub(crate) enum State { Open, Close, - Neither, + Unresolved, + Plain, } -impl Not for Resolved { +impl From for State { + fn from(value: TokenKind) -> Self { + match value { + TokenKind::OpenParens + | TokenKind::CloseParens + | TokenKind::OpenBracket + | TokenKind::CloseBracket + | TokenKind::OpenBrace + | TokenKind::CloseBrace + | TokenKind::Substitution + | TokenKind::Newline + | TokenKind::EscapedNewline + | TokenKind::Whitespace + | TokenKind::EscapedWhitespace + | TokenKind::Plain => State::Plain, + + _ => State::Unresolved, + } + } +} + +impl Not for State { type Output = bool; fn not(self) -> Self::Output { - matches!(self, Self::Neither) + matches!(self, Self::Unresolved) } } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub(crate) struct RawToken<'input> { pub(crate) token: Token<'input>, - pub(crate) state: Resolved, + pub(crate) state: State, pub(crate) tail: Option>>, } impl<'input> RawToken<'input> { - pub(crate) fn new(token: Token<'input>) -> Self { - Self { + pub(crate) fn new(token: Token) -> RawToken { + let state = State::from(token.kind); + + RawToken { token, - state: Resolved::Neither, + state, tail: None, } } @@ -37,9 +61,9 @@ impl<'input> RawToken<'input> { fn order(&mut self) { if let Some(ref sec_part) = self.tail { match (self.state, sec_part.state) { - (Resolved::Open, Resolved::Close) - | (Resolved::Neither, Resolved::Close) - | (Resolved::Open, Resolved::Neither) => {} + (State::Open, State::Close) + | (State::Unresolved, State::Close) + | (State::Open, State::Unresolved) => {} _ => self.swap_parts(), } } @@ -74,25 +98,34 @@ impl<'input> RawToken<'input> { self.token = first; self.tail = Some(Box::new(RawToken { token: second, - state: Resolved::Neither, + state: State::Unresolved, tail: None, })); } - pub(crate) fn set_head_state(&mut self, state: Resolved) { + pub(crate) fn set_head_state(&mut self, state: State) { self.state = state; } - pub(crate) fn set_tail_state(&mut self, state: Resolved) { + pub(crate) fn set_tail_state(&mut self, state: State) { if let Some(tail) = self.tail.as_mut() { tail.state = state; } } - pub(crate) fn set_state(&mut self, state: Resolved) { + pub(crate) fn set_state(&mut self, state: State) { self.set_head_state(state); self.set_tail_state(state); } + + pub(crate) fn is_resolved(&self) -> bool { + let self_resolved = self.state != State::Unresolved; + + match self.tail.as_ref() { + Some(tail) => tail.is_resolved() && self_resolved, + None => self_resolved, + } + } } impl<'token> From> for Token<'token> { diff --git a/inline/src/lexer/token.rs b/inline/src/lexer/token.rs index 045d81c3..c6640401 100644 --- a/inline/src/lexer/token.rs +++ b/inline/src/lexer/token.rs @@ -3,7 +3,7 @@ use std::ops::{Add, AddAssign, Sub, SubAssign}; use unimarkup_commons::scanner::span::{Span, SpanLen}; use unimarkup_commons::scanner::{self, SymbolKind}; -use super::resolver::Resolved; +use super::resolver::State; use super::ContentOption; use crate::Inline; @@ -695,12 +695,12 @@ pub enum Spacing { None, } -impl From for Spacing { - fn from(resolved: Resolved) -> Self { +impl From for Spacing { + fn from(resolved: State) -> Self { match resolved { - Resolved::Open => Spacing::Pre, - Resolved::Close => Spacing::Post, - Resolved::Neither => Spacing::Both, + State::Open => Spacing::Pre, + State::Close => Spacing::Post, + State::Unresolved | State::Plain => Spacing::Both, } } } diff --git a/inline/tests/spec/markup/bold.yml b/inline/tests/spec/markup/bold.yml index a1368dc0..51a182f3 100644 --- a/inline/tests/spec/markup/bold.yml +++ b/inline/tests/spec/markup/bold.yml @@ -74,3 +74,23 @@ tests: html: | The next word is bold. + + - name: ambiguous-start + description: | + Bold that's started with an ambiguous token. + + input: | + The next ***word** is bold. + + html: | + The next *word is bold. + + - name: ambiguous-end + description: | + Bold that's ended with an ambiguous token. + + input: | + The next **word*** is bold. + + html: | + The next word* is bold. diff --git a/inline/tests/spec/snapshots/lexer/bold/ambiguous-end.snap b/inline/tests/spec/snapshots/lexer/bold/ambiguous-end.snap new file mode 100644 index 00000000..322690a8 --- /dev/null +++ b/inline/tests/spec/snapshots/lexer/bold/ambiguous-end.snap @@ -0,0 +1,38 @@ +--- +source: inline/tests/lexer/mod.rs +info: "Test 'ambiguous-end' from 'markup/bold.yml'" +--- +The next **word*** is bold. +The +^^^ -> Plain @ (1:1)->(1:4) + + ^ -> Whitespace @ (1:4)->(1:5) + next + ^^^^ -> Plain @ (1:5)->(1:9) + + ^ -> Whitespace @ (1:9)->(1:10) + ** + ^^ -> Bold @ (1:10)->(1:12) + word + ^^^^ -> Plain @ (1:12)->(1:16) + ** + ^^ -> Bold @ (1:16)->(1:18) + * + ^ -> Plain @ (1:18)->(1:19) + + ^ -> Whitespace @ (1:19)->(1:20) + is + ^^ -> Plain @ (1:20)->(1:22) + + ^ -> Whitespace @ (1:22)->(1:23) + bold. + ^^^^^ -> Plain @ (1:23)->(1:28) + ⏎ + ^ -> Newline @ (1:28)->(2:1) + +--- +With input: + +The next **word*** is bold. + + diff --git a/inline/tests/spec/snapshots/lexer/bold/ambiguous-start.snap b/inline/tests/spec/snapshots/lexer/bold/ambiguous-start.snap new file mode 100644 index 00000000..83237935 --- /dev/null +++ b/inline/tests/spec/snapshots/lexer/bold/ambiguous-start.snap @@ -0,0 +1,38 @@ +--- +source: inline/tests/lexer/mod.rs +info: "Test 'ambiguous-start' from 'markup/bold.yml'" +--- +The next ***word** is bold. +The +^^^ -> Plain @ (1:1)->(1:4) + + ^ -> Whitespace @ (1:4)->(1:5) + next + ^^^^ -> Plain @ (1:5)->(1:9) + + ^ -> Whitespace @ (1:9)->(1:10) + * + ^ -> Plain @ (1:10)->(1:11) + ** + ^^ -> Bold @ (1:11)->(1:13) + word + ^^^^ -> Plain @ (1:13)->(1:17) + ** + ^^ -> Bold @ (1:17)->(1:19) + + ^ -> Whitespace @ (1:19)->(1:20) + is + ^^ -> Plain @ (1:20)->(1:22) + + ^ -> Whitespace @ (1:22)->(1:23) + bold. + ^^^^^ -> Plain @ (1:23)->(1:28) + ⏎ + ^ -> Newline @ (1:28)->(2:1) + +--- +With input: + +The next ***word** is bold. + + diff --git a/inline/tests/spec/snapshots/parser/bold/ambiguous-end.snap b/inline/tests/spec/snapshots/parser/bold/ambiguous-end.snap new file mode 100644 index 00000000..925e9311 --- /dev/null +++ b/inline/tests/spec/snapshots/parser/bold/ambiguous-end.snap @@ -0,0 +1,25 @@ +--- +source: inline/tests/parser/mod.rs +info: "Test 'ambiguous-end' from 'markup/bold.yml'" +--- +Plain @ (1:1)->(1:10) ( + The next + ^^^^^^^^^ +) +Bold @ (1:10)->(1:18) ( + Plain @ (1:12)->(1:16) ( + word + ^^^^ + ) +) +Plain @ (1:18)->(1:28) ( + * is bold. + ^^^^^^^^^^ +) + +--- +With input: + +The next **word*** is bold. + + diff --git a/inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap b/inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap new file mode 100644 index 00000000..c0aa4886 --- /dev/null +++ b/inline/tests/spec/snapshots/parser/bold/ambiguous-start.snap @@ -0,0 +1,25 @@ +--- +source: inline/tests/parser/mod.rs +info: "Test 'ambiguous-start' from 'markup/bold.yml'" +--- +Plain @ (1:1)->(1:11) ( + The next * + ^^^^^^^^^^ +) +Bold @ (1:11)->(1:19) ( + Plain @ (1:13)->(1:17) ( + word + ^^^^ + ) +) +Plain @ (1:19)->(1:28) ( + is bold. + ^^^^^^^^^ +) + +--- +With input: + +The next ***word** is bold. + +