Skip to content

Commit

Permalink
Use a line_offset vector to track offsets for inline sourcepos
Browse files Browse the repository at this point in the history
  • Loading branch information
digitalmoksha committed Aug 17, 2024
1 parent 9f4d391 commit feaf5cf
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 44 deletions.
14 changes: 7 additions & 7 deletions src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,13 +544,13 @@ impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> {
let info = ncb.info.as_bytes();
let literal = ncb.literal.as_bytes();

if info.is_empty()
&& (literal.len() > 2
&& !isspace(literal[0])
&& !(isspace(literal[literal.len() - 1])
&& isspace(literal[literal.len() - 2])))
&& !first_in_list_item
&& !self.options.render.prefer_fenced
#[allow(clippy::len_zero)]
if !(info.len() > 0
|| literal.len() <= 2
|| isspace(literal[0])
|| first_in_list_item
|| self.options.render.prefer_fenced
|| isspace(literal[literal.len() - 1]) && isspace(literal[literal.len() - 2]))
{
write!(self, " ").unwrap();
write!(self.prefix, " ").unwrap();
Expand Down
2 changes: 2 additions & 0 deletions src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ pub struct Ast {
pub(crate) open: bool,
pub(crate) last_line_blank: bool,
pub(crate) table_visited: bool,
pub(crate) line_offsets: Vec<usize>,
}

/// Represents the position in the source Markdown this node was rendered from.
Expand Down Expand Up @@ -609,6 +610,7 @@ impl Ast {
open: true,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
}
}
}
Expand Down
13 changes: 5 additions & 8 deletions src/parser/autolink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,11 @@ pub(crate) fn process_autolinks<'a>(
}
}

match contents[i] {
b'@' => {
post_org = email_match(arena, contents, i, relaxed_autolinks);
if post_org.is_some() {
break;
}
if contents[i] == b'@' {
post_org = email_match(arena, contents, i, relaxed_autolinks);
if post_org.is_some() {
break;
}
_ => (),
}
i += 1;
}
Expand Down Expand Up @@ -161,7 +158,7 @@ fn check_domain(data: &[u8], allow_short: bool) -> Option<usize> {
}

fn is_valid_hostchar(ch: char) -> bool {
!ch.is_whitespace() && !(ch.is_punctuation() || ch.is_symbol())
!(ch.is_whitespace() || ch.is_punctuation() || ch.is_symbol())
}

fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize {
Expand Down
20 changes: 13 additions & 7 deletions src/parser/inlines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ pub struct Subject<'a: 'd, 'r, 'o, 'c, 'd, 'i> {
pub input: &'i [u8],
line: usize,
pub pos: usize,
block_offset: usize,
column_offset: isize,
line_offset: usize,
flags: Flags,
pub refmap: &'r mut RefMap,
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
Expand Down Expand Up @@ -116,7 +116,6 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
options: &'o Options<'c>,
input: &'i [u8],
line: usize,
block_offset: usize,
refmap: &'r mut RefMap,
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
) -> Self {
Expand All @@ -126,8 +125,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
input,
line,
pos: 0,
block_offset,
column_offset: 0,
line_offset: 0,
flags: Flags::default(),
refmap,
delimiter_arena,
Expand Down Expand Up @@ -182,6 +181,11 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
None => return false,
Some(ch) => *ch as char,
};

let node_ast = node.data.borrow();
let adjusted_line = self.line - node_ast.sourcepos.start.line;
self.line_offset = node_ast.line_offsets[adjusted_line];

let new_inl: Option<&'a AstNode<'a>> = match c {
'\0' => return false,
'\r' | '\n' => Some(self.handle_newline()),
Expand Down Expand Up @@ -1604,7 +1608,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
inl.data.borrow_mut().sourcepos.start.column =
bracket_inl_text.data.borrow().sourcepos.start.column;
inl.data.borrow_mut().sourcepos.end.column = usize::try_from(
self.pos as isize + self.column_offset + self.block_offset as isize,
self.pos as isize + self.column_offset + self.line_offset as isize,
)
.unwrap();
bracket_inl_text.insert_before(inl);
Expand Down Expand Up @@ -1655,7 +1659,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
.sourcepos
.start;
inl.data.borrow_mut().sourcepos.end.column =
usize::try_from(self.pos as isize + self.column_offset + self.block_offset as isize)
usize::try_from(self.pos as isize + self.column_offset + self.line_offset as isize)
.unwrap();

self.brackets[brackets_len - 1].inl_text.insert_before(inl);
Expand Down Expand Up @@ -1847,8 +1851,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
end_column: usize,
) -> &'a AstNode<'a> {
let start_column =
start_column as isize + 1 + self.column_offset + self.block_offset as isize;
let end_column = end_column as isize + 1 + self.column_offset + self.block_offset as isize;
start_column as isize + 1 + self.column_offset + self.line_offset as isize;
let end_column = end_column as isize + 1 + self.column_offset + self.line_offset as isize;

let ast = Ast {
value,
Expand All @@ -1864,6 +1868,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
open: false,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
};
self.arena.alloc(Node::new(RefCell::new(ast)))
}
Expand Down Expand Up @@ -1972,6 +1977,7 @@ pub fn make_inline<'a>(
open: false,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
};
arena.alloc(Node::new(RefCell::new(ast)))
}
Expand Down
8 changes: 6 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub fn parse_document<'a>(
open: true,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
})));
let mut parser = Parser::new(arena, root, options);
let mut linebuf = Vec::with_capacity(buffer.len());
Expand Down Expand Up @@ -1998,6 +1999,11 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
}
}
if self.offset < line.len() {
// since whitespace is stripped off the beginning of lines, we need to keep
// track of how much was stripped off. This allows us to properly calculate
// inline sourcepos during inline processing.
ast.line_offsets.push(self.offset);

ast.content
.push_str(str::from_utf8(&line[self.offset..]).unwrap());
}
Expand Down Expand Up @@ -2185,7 +2191,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
self.options,
content,
node_data.sourcepos.start.line,
node_data.sourcepos.start.column - 1 + node_data.internal_offset,
&mut self.refmap,
&delimiter_arena,
);
Expand Down Expand Up @@ -2439,7 +2444,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
self.options,
content,
0, // XXX -1 in upstream; never used?
0,
&mut self.refmap,
&delimiter_arena,
);
Expand Down
24 changes: 14 additions & 10 deletions src/parser/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ fn try_opening_header<'a>(
start.column_add((cell.end_offset - header_row.paragraph_offset) as isize);
ast.internal_offset = cell.internal_offset;
ast.content.clone_from(&cell.content);
ast.line_offsets.push(
start.column + cell.start_offset - 1 + cell.internal_offset
- header_row.paragraph_offset,
);

i += 1;
}
Expand Down Expand Up @@ -172,6 +176,9 @@ fn try_opening_row<'a>(
cell_ast.internal_offset = cell.internal_offset;
cell_ast.sourcepos.end.column = sourcepos.start.column + cell.end_offset;
cell_ast.content.clone_from(&cell.content);
cell_ast
.line_offsets
.push(sourcepos.start.column + cell.start_offset - 1 + cell.internal_offset);

last_column = cell_ast.sourcepos.end.column;

Expand Down Expand Up @@ -295,16 +302,13 @@ fn try_inserting_table_header_paragraph<'a>(
let mut paragraph = Ast::new(NodeValue::Paragraph, start);
paragraph.sourcepos.end.line = start.line + newlines - 1;

// XXX We don't have the last_line_length to go on by this point,
// so we have no idea what the end column should be.
// We can't track it in row() like we do paragraph_offset, because
// we've already discarded the leading whitespace for that line.
// This is hard to avoid with this backtracking approach to
// creating the pre-table paragraph — we're doing the work of
// finalize() here, but without the parser state at that time.
// Approximate by just counting the line length as it is and adding
// to the start column.
paragraph.sourcepos.end.column = start.column - 1
// copy over the line offsets related to the paragraph
for n in 0..newlines {
paragraph.line_offsets.push(container_ast.line_offsets[n]);
}

let last_line_offset = *paragraph.line_offsets.last().unwrap_or(&0);
paragraph.sourcepos.end.column = last_line_offset
+ preface
.iter()
.rev()
Expand Down
104 changes: 100 additions & 4 deletions src/tests/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,8 +529,6 @@ fn link_sourcepos_newline() {
);
}

// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960.
#[ignore]
#[test]
fn link_sourcepos_truffle() {
assert_ast_match!(
Expand Down Expand Up @@ -577,8 +575,6 @@ fn link_sourcepos_truffle_twist() {
);
}

// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960.
#[ignore]
#[test]
fn link_sourcepos_truffle_bergamot() {
assert_ast_match!(
Expand All @@ -601,3 +597,103 @@ fn link_sourcepos_truffle_bergamot() {
])
);
}

#[test]
fn link_sourcepos_inline_paragraph_multiline() {
assert_ast_match!(
[],
" A\n"
" B\n",
(document (1:1-2:4) [
(paragraph (1:3-2:4) [
(text (1:3-1:3) "A")
(softbreak (1:4-1:4))
(text (2:4-2:4) "B")
])
])
);
}

#[test]
fn link_sourcepos_inline_listitem_multiline() {
assert_ast_match!(
[],
"- A\n"
"B\n",
(document (1:1-2:1) [
(list (1:1-2:1) [
(item (1:1-2:1) [
(paragraph (1:3-2:1) [
(text (1:3-1:3) "A")
(softbreak (1:4-1:4))
(text (2:1-2:1) "B")
])
])
])
])
);
}

#[test]
fn link_sourcepos_inline_listitem_multiline_2() {
assert_ast_match!(
[],
"- A\n"
" B\n"
"- C\n"
" D",
(document (1:1-4:2) [
(list (1:1-4:2) [
(item (1:1-2:4) [
(paragraph (1:3-2:4) [
(text (1:3-1:3) "A")
(softbreak (1:4-1:4))
(text (2:4-2:4) "B")
])
])
(item (3:1-4:2) [
(paragraph (3:4-4:2) [
(text (3:4-3:4) "C")
(softbreak (3:5-3:5))
(text (4:2-4:2) "D")
])
])
])
])
);
}

#[test]
fn link_sourcepos_inline_double_emphasis_1() {
assert_ast_match!(
[],
"_**this**_\n",
(document (1:1-1:10) [
(paragraph (1:1-1:10) [
(emph (1:1-1:10) [
(strong (1:2-1:9) [
(text (1:4-1:7) "this")
])
])
])
])
);
}

#[ignore]
#[test]
fn link_sourcepos_inline_double_emphasis_2() {
assert_ast_match!(
[],
"___this___\n",
(document (1:1-1:10) [
(paragraph (1:1-1:10) [
(emph (1:1-1:10) [
(strong (1:2-1:9) [
(text (1:4-1:7) "this")
])
])
])
])
);
}
8 changes: 2 additions & 6 deletions src/tests/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,10 @@ fn sourcepos_with_preceding_para_offset() {
" | c | d |\n"
,
(document (1:1-5:10) [

// XXX This should be 1:2-2:5; see
// crate::parser::table::try_inserting_table_header_paragraph.
(paragraph (1:2-2:4) [

(paragraph (1:2-2:5) [
(text (1:2-1:4) "123")
(softbreak (1:5-1:5))
(text (2:2-2:4) "456")
(text (2:3-2:5) "456")
])
(table (3:2-5:10) [
(table_row (3:2-3:10) [
Expand Down

0 comments on commit feaf5cf

Please sign in to comment.