Skip to content

Commit

Permalink
feat: allow multiple directives in a paragraph
Browse files Browse the repository at this point in the history
  • Loading branch information
vberlier committed Dec 17, 2023
1 parent a62a015 commit d43f32e
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 113 deletions.
212 changes: 101 additions & 111 deletions lectern/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import re
from dataclasses import replace
from itertools import islice
from pathlib import Path
from typing import (
Any,
Callable,
Expand All @@ -26,7 +25,6 @@
Tuple,
Union,
)
from urllib.parse import urlparse

from beet import Cache, DataPack, ResourcePack
from beet.core.utils import FileSystemPath
Expand Down Expand Up @@ -175,9 +173,6 @@ def create_fragment(
start_line: int,
end_line: int,
match: "re.Match[str]",
content: Optional[str] = None,
url: Optional[str] = None,
path: Optional[FileSystemPath] = None,
):
"""Helper for creating a fragment from a matched pattern."""
directive, modifier, arguments = match.groups()
Expand All @@ -187,9 +182,6 @@ def create_fragment(
directive=directive,
modifier=modifier,
arguments=arguments.split(),
content=content,
url=url,
path=path,
cache=self.cache,
)

Expand Down Expand Up @@ -319,33 +311,21 @@ def parse_fragments(
# ```
#
if (
(
skip_to := self.match_tokens(
tokens[i : i + 4],
"paragraph_open",
"inline",
"paragraph_close",
["fence", "code_block"],
)
skip_to := self.match_tokens(
tokens[i : i + 4],
"paragraph_open",
"inline",
"paragraph_close",
["fence", "code_block"],
)
and (inline := tokens[i + 1])
and inline.children
and self.match_tokens(inline.children, "code_inline")
and (
match := regex.match(inline := inline.children[0].content)
or (
(directory := RELATIVE_PATH_REGEX.match(inline))
and (
directive := "@resource_pack"
if directory[0] == "assets"
else "@data_pack"
)
and regex.match(f"{directive} {inline}")
)
) and (
fragments := list(
self.parse_inline(tokens[i + 1], directives, external_files)
)
):
yield self.create_fragment(
current_line, skip_to, match, content=tokens[i + 3].content
yield from fragments[:-1]
yield replace(
fragments[-1].with_content(tokens[i + 3].content), end_line=skip_to
)

#
Expand All @@ -365,17 +345,19 @@ def parse_fragments(
"paragraph_close",
)
)
and (inline := tokens[i + 1])
and inline.children
and self.match_tokens(inline.children, "code_inline")
and (image := tokens[i + 4])
and image.children
and self.match_tokens(image.children, "image")
and (link := image.children[0].attrGet("src"))
and (match := regex.match(inline.children[0].content))
and (
fragments := list(
self.parse_inline(tokens[i + 1], directives, external_files)
)
)
):
yield self.create_link_fragment(
current_line, skip_to, match, link, external_files
yield from fragments[:-1]
yield replace(
fragments[-1].with_link(link, external_files), end_line=skip_to
)

#
Expand All @@ -401,15 +383,17 @@ def parse_fragments(
"html_block",
)
)
and (inline := tokens[i + 1])
and inline.children
and self.match_tokens(inline.children, "code_inline")
and tokens[i + 3].content == "<details>\n"
and tokens[i + 5].content == "</details>\n"
and (match := regex.match(inline.children[0].content))
and (
fragments := list(
self.parse_inline(tokens[i + 1], directives, external_files)
)
)
):
yield self.create_fragment(
current_line, skip_to, match, content=tokens[i + 4].content
yield from fragments[:-1]
yield replace(
fragments[-1].with_content(tokens[i + 4].content), end_line=skip_to
)

#
Expand All @@ -435,46 +419,21 @@ def parse_fragments(
"html_block",
)
)
and (inline := tokens[i + 1])
and inline.children
and self.match_tokens(inline.children, "code_inline")
and tokens[i + 3].content == "<details>\n"
and tokens[i + 7].content == "</details>\n"
and (image := tokens[i + 5])
and image.children
and self.match_tokens(image.children, "image")
and (link := image.children[0].attrGet("src"))
and (match := regex.match(inline.children[0].content))
):
yield self.create_link_fragment(
current_line, skip_to, match, link, external_files
)

#
# [`@directive args...`](path/to/content)
#
elif (
(
skip_to := self.match_tokens(
tokens[i : i + 3],
"paragraph_open",
"inline",
"paragraph_close",
and (
fragments := list(
self.parse_inline(tokens[i + 1], directives, external_files)
)
)
and (inline := tokens[i + 1])
and inline.children
and self.match_tokens(
inline.children,
"link_open",
"code_inline",
"link_close",
)
and (link := inline.children[0].attrGet("href"))
and (match := regex.match(inline.children[1].content))
):
yield self.create_link_fragment(
current_line, skip_to, match, link, external_files
yield from fragments[:-1]
yield replace(
fragments[-1].with_link(link, external_files), end_line=skip_to
)

#
Expand All @@ -495,8 +454,8 @@ def parse_fragments(
and (comment := self.html_comment_regex.match(token.content))
and (match := regex.match(comment.group(1)))
):
yield self.create_fragment(
current_line, skip_to, match, content=tokens[i + 1].content
yield self.create_fragment(current_line, skip_to, match).with_content(
tokens[i + 1].content
)

#
Expand All @@ -521,28 +480,18 @@ def parse_fragments(
and (link := image.children[0].attrGet("src"))
and (match := regex.match(comment.group(1)))
):
yield self.create_link_fragment(
current_line, skip_to, match, link, external_files
yield self.create_fragment(current_line, skip_to, match).with_link(
link, external_files
)

#
# [`@directive args...`](path/to/content)
# `@directive args...`
#
elif (
(
skip_to := self.match_tokens(
tokens[i : i + 3],
"paragraph_open",
"inline",
"paragraph_close",
)
)
and (inline := tokens[i + 1])
and inline.children
and self.match_tokens(inline.children, "code_inline")
and (match := regex.match(inline.children[0].content))
elif (skip_to := self.match_tokens([token], "inline")) and (
fragments := list(self.parse_inline(token, directives, external_files))
):
yield self.create_fragment(current_line, skip_to, match)
yield from fragments

#
# <!-- @directive args... -->
Expand Down Expand Up @@ -598,6 +547,58 @@ def parse_fragments(
end_line=(skip_to := fragment.end_line + current_line),
)

def parse_inline(
self,
token: Token,
directives: Mapping[str, Directive],
external_files: Optional[FileSystemPath] = None,
) -> Iterator[Fragment]:
if token.type == "inline" and token.children and token.map:
regex = self.get_regex(directives)

it = iter(enumerate(token.children))

for i, child in it:
if (
self.match_newline(token.children, i - 1)
and self.match_newline(token.children, i + 1)
and child.type == "code_inline"
):
code = child
elif (
self.match_newline(token.children, i - 1)
and self.match_newline(token.children, i + 3)
and self.match_tokens(
token.children[i : i + 3],
"link_open",
"code_inline",
"link_close",
)
):
code = token.children[i + 1]
next(it)
next(it)
else:
continue

if match := (
regex.match(code.content)
or (
(directory := RELATIVE_PATH_REGEX.match(code.content))
and (
directive := "@resource_pack"
if directory[0] == "assets"
else "@data_pack"
)
and regex.match(f"{directive} {code.content}")
)
):
start_line, end_line = token.map
fragment = self.create_fragment(start_line, end_line, match)
if child.type == "link_open" and (link := child.attrGet("href")):
fragment = fragment.with_link(link, external_files)
yield fragment

def match_tokens(
self,
tokens: Optional[List[Token]],
Expand All @@ -618,21 +619,10 @@ def match_tokens(
and next((token.map[-1] for token in reversed(tokens) if token.map), 1) # type: ignore
)

def create_link_fragment(
self,
start_line: int,
end_line: int,
match: "re.Match[str]",
link: Any,
external_files: Optional[FileSystemPath] = None,
) -> Fragment:
"""Helper for creating a fragment from a link."""
url = str(link)
path = None

if urlparse(url).path == url:
if external_files:
path = Path(external_files, url).resolve()
url = None

return self.create_fragment(start_line, end_line, match, url=url, path=path)
def match_newline(self, tokens: Optional[List[Token]], index: int) -> bool:
return (
not tokens
or index < 0
or index >= len(tokens)
or tokens[index].type in ("softbreak", "hardbreak")
)
26 changes: 24 additions & 2 deletions lectern/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@


from base64 import b64decode
from dataclasses import dataclass
from dataclasses import dataclass, replace
from pathlib import Path
from typing import Any, Optional, Sequence, Type, TypeVar, overload
from urllib.parse import urlparse
from urllib.request import urlopen

from beet import BinaryFile, BinaryFileBase, BubbleException, Cache, File
Expand Down Expand Up @@ -45,8 +47,28 @@ class Fragment:
file: Optional[File[Any, Any]] = None
cache: Optional[Cache] = None

def with_content(self, content: str) -> "Fragment":
"""Replace content."""
return replace(self, content=content)

def with_link(
self,
link: Any,
external_files: Optional[FileSystemPath] = None,
) -> "Fragment":
"""Replace linked content."""
url = str(link)
path = None

if urlparse(url).path == url:
if external_files:
path = Path(external_files, url).resolve()
url = None

return replace(self, url=url, path=path)

@overload
def expect(self):
def expect(self) -> None:
...

@overload
Expand Down
14 changes: 14 additions & 0 deletions tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ def test_text_tricky():
assert len(doc.data.functions) == 2


def test_markdown_breaks():
doc = Document()
doc.directives["dummy"] = lambda fragment, _, data: data.functions.update(
{fragment.expect("full_name"): Function(["say dummy"])}
)
doc.add(
"same paragraph\n`@dummy demo:dummy`\n`@function demo:foo`\n```\nsay foo\n```\n"
)
assert doc.data.functions == {
"demo:dummy": Function(["say dummy"]),
"demo:foo": Function(["say foo"]),
}


def test_missing_argument():
with pytest.raises(
InvalidFragment, match="Missing argument 'full_name' for directive @function."
Expand Down

0 comments on commit d43f32e

Please sign in to comment.