release v0.2.5

Zhaopudark · Jan 25, 2024 · c404684 · c404684
1 parent ef61c56
commit c404684
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 22 deletions.
diff --git a/RELEASE.md b/RELEASE.md
@@ -1,3 +1,7 @@
+# Pandoc-Filter 0.2.5
+## Release 0.2.5
+Re-organize the inner implementation about decoding url.
+
 # Pandoc-Filter 0.2.4
 ## Release 0.2.4
 Fix a bug in `upload_figure_to_aliyun_filter` when local file path contains spaces.

diff --git a/src/pandoc_filter/filters/md2html/hash_anchor_and_internal_link.py b/src/pandoc_filter/filters/md2html/hash_anchor_and_internal_link.py
@@ -5,8 +5,7 @@
 import panflute as pf
 
 from ...utils import TracingLogger,DocRuntimeDict,InternalLink
-from ...utils import get_html_id,sub_html_id,get_html_href,get_text_hash
-from ..md2md.norm_internal_link import _decode_internal_link_url
+from ...utils import get_html_id,sub_html_id,get_html_href,get_text_hash,decode_internal_link_url
 
 
 r"""A pandoc filter that mainly for converting `markdown` to `html`.
@@ -109,7 +108,7 @@ def _url_hash_guess(text:str)->str:
 
     if isinstance(elem, pf.Link) and elem.url.startswith('#'):
         # Olny md internal links need to be decoded since it will be encoded by pandoc before filter.
-        decoded_url = _decode_internal_link_url(elem.url) 
+        decoded_url = decode_internal_link_url(elem.url) 
         url,guessed_url_with_num = _url_hash_guess(decoded_url)
         doc.runtime_dict['internal_link_record'].append(InternalLink(elem,url=url,guessed_url=guessed_url_with_num))
     elif isinstance(elem, pf.RawInline) and elem.format == 'html' and (old_href:=get_html_href(elem.text)) and old_href.startswith('#'):

diff --git a/src/pandoc_filter/filters/md2md/norm_internal_link.py b/src/pandoc_filter/filters/md2md/norm_internal_link.py
@@ -1,9 +1,8 @@
 import typeguard
-import urllib.parse
 import panflute as pf
 
 from ...utils import TracingLogger
-from ...utils import get_html_href,sub_html_href
+from ...utils import get_html_href,sub_html_href,decode_internal_link_url
 
 r"""A pandoc filter that mainly for converting `markdown` to `markdown`.
 Normalize internal links' URLs. Decode the URL if it is URL-encoded.
@@ -24,15 +23,6 @@
     If in html, it is a raw-HTML element with href attribute that starts with `#`. Such as:
         `<a href="#aaa">bbb</a>`
 """
-@typeguard.typechecked
-def _decode_internal_link_url(url:str)->str:
-    r"""When converting markdown to any type via pandoc, md internal links' URLs may be automatically URL-encoded before any filter works.
-    The encoding is done by default and may not be avoided.
-    This function is used to decode the URL.
-    """
-    decoded_url = urllib.parse.unquote(url.lstrip('#'))
-    header_mimic = pf.convert_text(f"# {decoded_url}",input_format='markdown',output_format='gfm',standalone=True)
-    return f"#{header_mimic.lstrip('# ')}"
 
 def _norm_internal_link(elem:pf.Element,doc:pf.Doc,**kwargs)->None:
     r"""Follow the general procedure of [Panflute](http://scorreia.com/software/panflute/)
@@ -44,11 +34,11 @@ def _norm_internal_link(elem:pf.Element,doc:pf.Doc,**kwargs)->None:
 
     if isinstance(elem, pf.Link) and elem.url.startswith('#'):
         tracing_logger.mark(elem)       
-        elem.url = _decode_internal_link_url(elem.url)
+        elem.url = decode_internal_link_url(elem.url)
         tracing_logger.check_and_log('anchor_links',elem)
     elif isinstance(elem, pf.RawInline) and elem.format == 'html' and (old_href:=get_html_href(elem.text)) and old_href.startswith('#'):
         tracing_logger.mark(elem)
-        elem.text = sub_html_href(elem.text,_decode_internal_link_url(old_href))
+        elem.text = sub_html_href(elem.text,decode_internal_link_url(old_href))
         tracing_logger.check_and_log('raw_anchor_links',elem)
 
 def norm_internal_link_filter(doc:pf.Doc=None,**kwargs):

diff --git a/src/pandoc_filter/filters/md2md/upload_figure_to_aliyun.py b/src/pandoc_filter/filters/md2md/upload_figure_to_aliyun.py
@@ -5,8 +5,8 @@
 import typeguard
 import panflute as pf
 
-from ...utils import TracingLogger,OssHelper,DocRuntimeDict
-from ...utils import get_html_src,sub_html_src
+from ...utils import TracingLogger,OssHelper
+from ...utils import get_html_src,sub_html_src,decode_src_url
 
 r"""A pandoc filter that mainly for converting `markdown` to `markdown`.
 Auto upload local pictures to Aliyun OSS. Replace the original `src` with the new one.
@@ -32,13 +32,13 @@ def _upload_figure_to_aliyun(elem:pf.Element,doc:pf.Doc,**kwargs)->None:
     typeguard.check_type(kwargs['doc_path'],pathlib.Path)
     doc_path: pathlib.Path = kwargs['doc_path']
     if isinstance(elem, pf.Image) and (old_src:=str(elem.url)).startswith('.'): # reletive path
-        old_src = urllib.parse.unquote(old_src)
+        old_src = decode_src_url(old_src)
         new_src = oss_helper.maybe_upload_file_and_get_src(doc_path.parent/old_src)
         tracing_logger.mark(elem)
         elem.url = new_src
         tracing_logger.check_and_log('image',elem)
     elif isinstance(elem, pf.RawInline) and elem.format == 'html' and (old_src:=get_html_src(elem.text)) and old_src.startswith('.'): # reletive path
-        old_src = urllib.parse.unquote(old_src)
+        old_src = decode_src_url(old_src)
         new_src = oss_helper.maybe_upload_file_and_get_src(doc_path.parent/old_src)
         tracing_logger.mark(elem)
         elem.text = sub_html_src(elem.text,new_src)

diff --git a/src/pandoc_filter/utils/panflute_helper.py b/src/pandoc_filter/utils/panflute_helper.py
@@ -1,4 +1,5 @@
 from typing import Any,TypedDict
+import urllib.parse
 import typeguard
 import pathlib
 from collections import UserDict
@@ -29,4 +30,22 @@ class DocRuntimeDict(TypedDict):
     equations_count:int|None
     math:bool|None
     doc_path:pathlib.Path|None
-    oss_helper:OssHelper|None
+    oss_helper:OssHelper|None
+
+@typeguard.typechecked
+def decode_internal_link_url(url:str)->str:
+    r"""When converting markdown to any type via pandoc, internal links' URLs may be automatically URL-encoded before any filter works.
+    The encoding is done by default and may not be avoided.
+    This function is used to decode the URL.
+    """
+    decoded_url = urllib.parse.unquote(url.lstrip('#'))
+    header_mimic = pf.convert_text(f"# {decoded_url}",input_format='markdown',output_format='gfm',standalone=True)
+    return f"#{header_mimic.lstrip('# ')}"
+
+@typeguard.typechecked
+def decode_src_url(url:str)->str:
+    r"""When converting markdown to any type via pandoc, some elements' `src` URLs may be automatically URL-encoded before any filter works.
+    The encoding is done by default and may not be avoided.
+    This function is used to decode the URL.
+    """
+    return urllib.parse.unquote(url)
diff --git a/src/pandoc_filter/version.py b/src/pandoc_filter/version.py
@@ -4,4 +4,4 @@
 from .utils import check_pandoc_version
 
 check_pandoc_version(required_version='3.1.0')
-__version__ = '0.2.4'
+__version__ = '0.2.5'