diff --git a/lectern/extract.py b/lectern/extract.py index 72ece61..18435ec 100644 --- a/lectern/extract.py +++ b/lectern/extract.py @@ -12,7 +12,6 @@ import re from dataclasses import replace from itertools import islice -from pathlib import Path from typing import ( Any, Callable, @@ -26,7 +25,6 @@ Tuple, Union, ) -from urllib.parse import urlparse from beet import Cache, DataPack, ResourcePack from beet.core.utils import FileSystemPath @@ -175,9 +173,6 @@ def create_fragment( start_line: int, end_line: int, match: "re.Match[str]", - content: Optional[str] = None, - url: Optional[str] = None, - path: Optional[FileSystemPath] = None, ): """Helper for creating a fragment from a matched pattern.""" directive, modifier, arguments = match.groups() @@ -187,9 +182,6 @@ def create_fragment( directive=directive, modifier=modifier, arguments=arguments.split(), - content=content, - url=url, - path=path, cache=self.cache, ) @@ -319,33 +311,21 @@ def parse_fragments( # ``` # if ( - ( - skip_to := self.match_tokens( - tokens[i : i + 4], - "paragraph_open", - "inline", - "paragraph_close", - ["fence", "code_block"], - ) + skip_to := self.match_tokens( + tokens[i : i + 4], + "paragraph_open", + "inline", + "paragraph_close", + ["fence", "code_block"], ) - and (inline := tokens[i + 1]) - and inline.children - and self.match_tokens(inline.children, "code_inline") - and ( - match := regex.match(inline := inline.children[0].content) - or ( - (directory := RELATIVE_PATH_REGEX.match(inline)) - and ( - directive := "@resource_pack" - if directory[0] == "assets" - else "@data_pack" - ) - and regex.match(f"{directive} {inline}") - ) + ) and ( + fragments := list( + self.parse_inline(tokens[i + 1], directives, external_files) ) ): - yield self.create_fragment( - current_line, skip_to, match, content=tokens[i + 3].content + yield from fragments[:-1] + yield replace( + fragments[-1].with_content(tokens[i + 3].content), end_line=skip_to ) # @@ -365,17 +345,19 @@ def parse_fragments( "paragraph_close", ) ) - and (inline := tokens[i + 1]) - and inline.children - and self.match_tokens(inline.children, "code_inline") and (image := tokens[i + 4]) and image.children and self.match_tokens(image.children, "image") and (link := image.children[0].attrGet("src")) - and (match := regex.match(inline.children[0].content)) + and ( + fragments := list( + self.parse_inline(tokens[i + 1], directives, external_files) + ) + ) ): - yield self.create_link_fragment( - current_line, skip_to, match, link, external_files + yield from fragments[:-1] + yield replace( + fragments[-1].with_link(link, external_files), end_line=skip_to ) # @@ -401,15 +383,17 @@ def parse_fragments( "html_block", ) ) - and (inline := tokens[i + 1]) - and inline.children - and self.match_tokens(inline.children, "code_inline") and tokens[i + 3].content == "
\n" and tokens[i + 5].content == "
\n" - and (match := regex.match(inline.children[0].content)) + and ( + fragments := list( + self.parse_inline(tokens[i + 1], directives, external_files) + ) + ) ): - yield self.create_fragment( - current_line, skip_to, match, content=tokens[i + 4].content + yield from fragments[:-1] + yield replace( + fragments[-1].with_content(tokens[i + 4].content), end_line=skip_to ) # @@ -435,46 +419,21 @@ def parse_fragments( "html_block", ) ) - and (inline := tokens[i + 1]) - and inline.children - and self.match_tokens(inline.children, "code_inline") and tokens[i + 3].content == "
\n" and tokens[i + 7].content == "
\n" and (image := tokens[i + 5]) and image.children and self.match_tokens(image.children, "image") and (link := image.children[0].attrGet("src")) - and (match := regex.match(inline.children[0].content)) - ): - yield self.create_link_fragment( - current_line, skip_to, match, link, external_files - ) - - # - # [`@directive args...`](path/to/content) - # - elif ( - ( - skip_to := self.match_tokens( - tokens[i : i + 3], - "paragraph_open", - "inline", - "paragraph_close", + and ( + fragments := list( + self.parse_inline(tokens[i + 1], directives, external_files) ) ) - and (inline := tokens[i + 1]) - and inline.children - and self.match_tokens( - inline.children, - "link_open", - "code_inline", - "link_close", - ) - and (link := inline.children[0].attrGet("href")) - and (match := regex.match(inline.children[1].content)) ): - yield self.create_link_fragment( - current_line, skip_to, match, link, external_files + yield from fragments[:-1] + yield replace( + fragments[-1].with_link(link, external_files), end_line=skip_to ) # @@ -495,8 +454,8 @@ def parse_fragments( and (comment := self.html_comment_regex.match(token.content)) and (match := regex.match(comment.group(1))) ): - yield self.create_fragment( - current_line, skip_to, match, content=tokens[i + 1].content + yield self.create_fragment(current_line, skip_to, match).with_content( + tokens[i + 1].content ) # @@ -521,28 +480,18 @@ def parse_fragments( and (link := image.children[0].attrGet("src")) and (match := regex.match(comment.group(1))) ): - yield self.create_link_fragment( - current_line, skip_to, match, link, external_files + yield self.create_fragment(current_line, skip_to, match).with_link( + link, external_files ) # + # [`@directive args...`](path/to/content) # `@directive args...` # - elif ( - ( - skip_to := self.match_tokens( - tokens[i : i + 3], - "paragraph_open", - "inline", - "paragraph_close", - ) - ) - and (inline := tokens[i + 1]) - and inline.children - and self.match_tokens(inline.children, "code_inline") - and (match := regex.match(inline.children[0].content)) + elif (skip_to := self.match_tokens([token], "inline")) and ( + fragments := list(self.parse_inline(token, directives, external_files)) ): - yield self.create_fragment(current_line, skip_to, match) + yield from fragments # # @@ -598,6 +547,58 @@ def parse_fragments( end_line=(skip_to := fragment.end_line + current_line), ) + def parse_inline( + self, + token: Token, + directives: Mapping[str, Directive], + external_files: Optional[FileSystemPath] = None, + ) -> Iterator[Fragment]: + if token.type == "inline" and token.children and token.map: + regex = self.get_regex(directives) + + it = iter(enumerate(token.children)) + + for i, child in it: + if ( + self.match_newline(token.children, i - 1) + and self.match_newline(token.children, i + 1) + and child.type == "code_inline" + ): + code = child + elif ( + self.match_newline(token.children, i - 1) + and self.match_newline(token.children, i + 3) + and self.match_tokens( + token.children[i : i + 3], + "link_open", + "code_inline", + "link_close", + ) + ): + code = token.children[i + 1] + next(it) + next(it) + else: + continue + + if match := ( + regex.match(code.content) + or ( + (directory := RELATIVE_PATH_REGEX.match(code.content)) + and ( + directive := "@resource_pack" + if directory[0] == "assets" + else "@data_pack" + ) + and regex.match(f"{directive} {code.content}") + ) + ): + start_line, end_line = token.map + fragment = self.create_fragment(start_line, end_line, match) + if child.type == "link_open" and (link := child.attrGet("href")): + fragment = fragment.with_link(link, external_files) + yield fragment + def match_tokens( self, tokens: Optional[List[Token]], @@ -618,21 +619,10 @@ def match_tokens( and next((token.map[-1] for token in reversed(tokens) if token.map), 1) # type: ignore ) - def create_link_fragment( - self, - start_line: int, - end_line: int, - match: "re.Match[str]", - link: Any, - external_files: Optional[FileSystemPath] = None, - ) -> Fragment: - """Helper for creating a fragment from a link.""" - url = str(link) - path = None - - if urlparse(url).path == url: - if external_files: - path = Path(external_files, url).resolve() - url = None - - return self.create_fragment(start_line, end_line, match, url=url, path=path) + def match_newline(self, tokens: Optional[List[Token]], index: int) -> bool: + return ( + not tokens + or index < 0 + or index >= len(tokens) + or tokens[index].type in ("softbreak", "hardbreak") + ) diff --git a/lectern/fragment.py b/lectern/fragment.py index 786d4a0..b5671aa 100644 --- a/lectern/fragment.py +++ b/lectern/fragment.py @@ -5,8 +5,10 @@ from base64 import b64decode -from dataclasses import dataclass +from dataclasses import dataclass, replace +from pathlib import Path from typing import Any, Optional, Sequence, Type, TypeVar, overload +from urllib.parse import urlparse from urllib.request import urlopen from beet import BinaryFile, BinaryFileBase, BubbleException, Cache, File @@ -45,8 +47,28 @@ class Fragment: file: Optional[File[Any, Any]] = None cache: Optional[Cache] = None + def with_content(self, content: str) -> "Fragment": + """Replace content.""" + return replace(self, content=content) + + def with_link( + self, + link: Any, + external_files: Optional[FileSystemPath] = None, + ) -> "Fragment": + """Replace linked content.""" + url = str(link) + path = None + + if urlparse(url).path == url: + if external_files: + path = Path(external_files, url).resolve() + url = None + + return replace(self, url=url, path=path) + @overload - def expect(self): + def expect(self) -> None: ... @overload diff --git a/tests/test_document.py b/tests/test_document.py index 2174c30..f053dc2 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -63,6 +63,20 @@ def test_text_tricky(): assert len(doc.data.functions) == 2 +def test_markdown_breaks(): + doc = Document() + doc.directives["dummy"] = lambda fragment, _, data: data.functions.update( + {fragment.expect("full_name"): Function(["say dummy"])} + ) + doc.add( + "same paragraph\n`@dummy demo:dummy`\n`@function demo:foo`\n```\nsay foo\n```\n" + ) + assert doc.data.functions == { + "demo:dummy": Function(["say dummy"]), + "demo:foo": Function(["say foo"]), + } + + def test_missing_argument(): with pytest.raises( InvalidFragment, match="Missing argument 'full_name' for directive @function."