Skip to content

Commit

Permalink
release v0.2.5
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhaopudark committed Jan 25, 2024
1 parent ef61c56 commit c404684
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 22 deletions.
4 changes: 4 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Pandoc-Filter 0.2.5
## Release 0.2.5
Re-organize the inner implementation about decoding url.

# Pandoc-Filter 0.2.4
## Release 0.2.4
Fix a bug in `upload_figure_to_aliyun_filter` when local file path contains spaces.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import panflute as pf

from ...utils import TracingLogger,DocRuntimeDict,InternalLink
from ...utils import get_html_id,sub_html_id,get_html_href,get_text_hash
from ..md2md.norm_internal_link import _decode_internal_link_url
from ...utils import get_html_id,sub_html_id,get_html_href,get_text_hash,decode_internal_link_url


r"""A pandoc filter that mainly for converting `markdown` to `html`.
Expand Down Expand Up @@ -109,7 +108,7 @@ def _url_hash_guess(text:str)->str:

if isinstance(elem, pf.Link) and elem.url.startswith('#'):
# Olny md internal links need to be decoded since it will be encoded by pandoc before filter.
decoded_url = _decode_internal_link_url(elem.url)
decoded_url = decode_internal_link_url(elem.url)
url,guessed_url_with_num = _url_hash_guess(decoded_url)
doc.runtime_dict['internal_link_record'].append(InternalLink(elem,url=url,guessed_url=guessed_url_with_num))
elif isinstance(elem, pf.RawInline) and elem.format == 'html' and (old_href:=get_html_href(elem.text)) and old_href.startswith('#'):
Expand Down
16 changes: 3 additions & 13 deletions src/pandoc_filter/filters/md2md/norm_internal_link.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import typeguard
import urllib.parse
import panflute as pf

from ...utils import TracingLogger
from ...utils import get_html_href,sub_html_href
from ...utils import get_html_href,sub_html_href,decode_internal_link_url

r"""A pandoc filter that mainly for converting `markdown` to `markdown`.
Normalize internal links' URLs. Decode the URL if it is URL-encoded.
Expand All @@ -24,15 +23,6 @@
If in html, it is a raw-HTML element with href attribute that starts with `#`. Such as:
`<a href="#aaa">bbb</a>`
"""
@typeguard.typechecked
def _decode_internal_link_url(url:str)->str:
r"""When converting markdown to any type via pandoc, md internal links' URLs may be automatically URL-encoded before any filter works.
The encoding is done by default and may not be avoided.
This function is used to decode the URL.
"""
decoded_url = urllib.parse.unquote(url.lstrip('#'))
header_mimic = pf.convert_text(f"# {decoded_url}",input_format='markdown',output_format='gfm',standalone=True)
return f"#{header_mimic.lstrip('# ')}"

def _norm_internal_link(elem:pf.Element,doc:pf.Doc,**kwargs)->None:
r"""Follow the general procedure of [Panflute](http://scorreia.com/software/panflute/)
Expand All @@ -44,11 +34,11 @@ def _norm_internal_link(elem:pf.Element,doc:pf.Doc,**kwargs)->None:

if isinstance(elem, pf.Link) and elem.url.startswith('#'):
tracing_logger.mark(elem)
elem.url = _decode_internal_link_url(elem.url)
elem.url = decode_internal_link_url(elem.url)
tracing_logger.check_and_log('anchor_links',elem)
elif isinstance(elem, pf.RawInline) and elem.format == 'html' and (old_href:=get_html_href(elem.text)) and old_href.startswith('#'):
tracing_logger.mark(elem)
elem.text = sub_html_href(elem.text,_decode_internal_link_url(old_href))
elem.text = sub_html_href(elem.text,decode_internal_link_url(old_href))
tracing_logger.check_and_log('raw_anchor_links',elem)

def norm_internal_link_filter(doc:pf.Doc=None,**kwargs):
Expand Down
8 changes: 4 additions & 4 deletions src/pandoc_filter/filters/md2md/upload_figure_to_aliyun.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import typeguard
import panflute as pf

from ...utils import TracingLogger,OssHelper,DocRuntimeDict
from ...utils import get_html_src,sub_html_src
from ...utils import TracingLogger,OssHelper
from ...utils import get_html_src,sub_html_src,decode_src_url

r"""A pandoc filter that mainly for converting `markdown` to `markdown`.
Auto upload local pictures to Aliyun OSS. Replace the original `src` with the new one.
Expand All @@ -32,13 +32,13 @@ def _upload_figure_to_aliyun(elem:pf.Element,doc:pf.Doc,**kwargs)->None:
typeguard.check_type(kwargs['doc_path'],pathlib.Path)
doc_path: pathlib.Path = kwargs['doc_path']
if isinstance(elem, pf.Image) and (old_src:=str(elem.url)).startswith('.'): # reletive path
old_src = urllib.parse.unquote(old_src)
old_src = decode_src_url(old_src)
new_src = oss_helper.maybe_upload_file_and_get_src(doc_path.parent/old_src)
tracing_logger.mark(elem)
elem.url = new_src
tracing_logger.check_and_log('image',elem)
elif isinstance(elem, pf.RawInline) and elem.format == 'html' and (old_src:=get_html_src(elem.text)) and old_src.startswith('.'): # reletive path
old_src = urllib.parse.unquote(old_src)
old_src = decode_src_url(old_src)
new_src = oss_helper.maybe_upload_file_and_get_src(doc_path.parent/old_src)
tracing_logger.mark(elem)
elem.text = sub_html_src(elem.text,new_src)
Expand Down
21 changes: 20 additions & 1 deletion src/pandoc_filter/utils/panflute_helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Any,TypedDict
import urllib.parse
import typeguard
import pathlib
from collections import UserDict
Expand Down Expand Up @@ -29,4 +30,22 @@ class DocRuntimeDict(TypedDict):
equations_count:int|None
math:bool|None
doc_path:pathlib.Path|None
oss_helper:OssHelper|None
oss_helper:OssHelper|None

@typeguard.typechecked
def decode_internal_link_url(url:str)->str:
r"""When converting markdown to any type via pandoc, internal links' URLs may be automatically URL-encoded before any filter works.
The encoding is done by default and may not be avoided.
This function is used to decode the URL.
"""
decoded_url = urllib.parse.unquote(url.lstrip('#'))
header_mimic = pf.convert_text(f"# {decoded_url}",input_format='markdown',output_format='gfm',standalone=True)
return f"#{header_mimic.lstrip('# ')}"

@typeguard.typechecked
def decode_src_url(url:str)->str:
r"""When converting markdown to any type via pandoc, some elements' `src` URLs may be automatically URL-encoded before any filter works.
The encoding is done by default and may not be avoided.
This function is used to decode the URL.
"""
return urllib.parse.unquote(url)
2 changes: 1 addition & 1 deletion src/pandoc_filter/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .utils import check_pandoc_version

check_pandoc_version(required_version='3.1.0')
__version__ = '0.2.4'
__version__ = '0.2.5'

0 comments on commit c404684

Please sign in to comment.