From ef8ceaf491f5576a3f0a030609708bed944de2d3 Mon Sep 17 00:00:00 2001
From: Lukas <50193931+lmuenter@users.noreply.github.com>
Date: Wed, 25 Sep 2024 20:53:44 +0200
Subject: [PATCH] Fix tag list (#5)

* refactor and add metadata parsing method

* fix failing tests

---------

Co-authored-by: lukas <lmuenter@gfbio.org>
---
 pelican/plugins/lm_pelican_quarto/quarto.py   | 87 +------------------
 pelican/plugins/lm_pelican_quarto/readers.py  | 77 ++++++++++++++++
 .../lm_pelican_quarto/tests/test_quarto.py    | 12 +--
 3 files changed, 86 insertions(+), 90 deletions(-)
 create mode 100644 pelican/plugins/lm_pelican_quarto/readers.py

diff --git a/pelican/plugins/lm_pelican_quarto/quarto.py b/pelican/plugins/lm_pelican_quarto/quarto.py
index 7eadbf0..f13a3a6 100644
--- a/pelican/plugins/lm_pelican_quarto/quarto.py
+++ b/pelican/plugins/lm_pelican_quarto/quarto.py
@@ -1,102 +1,19 @@
-from datetime import date, datetime
 import logging
 from pathlib import Path
-import re
 
 from bs4 import BeautifulSoup
-import markdown
-import pytz
-import yaml
 
-from pelican import readers, signals
-from pelican.contents import Author, Category
+from pelican import signals
 from pelican.generators import ArticlesGenerator
 
 from .adapters import Quarto
 from .parsers import QuartoHTML
+from .readers import QuartoReader
 
 logger = logging.getLogger(__name__)
 QUARTO_EXTENSION = "qmd"
 
 
-class QuartoReader(readers.BaseReader):
-    """Read QMD Files using a Pelican Reader."""
-
-    file_extensions = [QUARTO_EXTENSION]
-
-    def read(self, filename):
-        """Read QMD Files."""
-        with open(filename, encoding="utf-8") as file:
-            content = file.read()
-
-        # extract yaml header and content body
-        _, front_matter, markdown_body = re.split(r"^---\s*$", content, 2, re.MULTILINE)
-
-        metadata = yaml.load(front_matter, Loader=yaml.FullLoader)
-
-        # ensure correct datetime format for date
-        metadata["date"] = self.parse_date(metadata["date"])
-
-        if "category" in metadata:
-            metadata["category"] = Category(
-                metadata["category"], settings=self.settings
-            )
-        if "author" in metadata:
-            metadata["author"] = Author(metadata["author"], settings=self.settings)
-
-        article_content = markdown.markdown(markdown_body)
-        metadata["summary"] = self.generate_article_summary(
-            metadata.get("summary"), article_content
-        )
-        return article_content, metadata
-
-    def parse_date(self, date_input):
-        """Ensure date has timezone information."""
-        if isinstance(date_input, datetime):
-            return (
-                date_input if date_input.tzinfo else date_input.replace(tzinfo=pytz.UTC)
-            )
-        if isinstance(date_input, date):
-            return datetime(
-                year=date_input.year,
-                month=date_input.month,
-                day=date_input.day,
-                tzinfo=pytz.UTC,
-            )
-        if isinstance(date_input, str):
-            return datetime.strptime(date_input, "%Y-%m-%d").replace(tzinfo=pytz.UTC)
-        logger.error("Invalid date format or type")
-        return None
-
-    def generate_article_summary(self, existing_summary, content):
-        """Generate a summary if one does not exist."""
-        if existing_summary:
-            return existing_summary
-
-        # strip code blocks
-        content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
-        html_content = markdown.markdown(content_no_code)
-
-        soup = BeautifulSoup(html_content, "html.parser")
-
-        max_paragraphs = self.settings.get("SUMMARY_MAX_PARAGRAPHS", None)
-        if max_paragraphs is not None:
-            paragraphs = soup.find_all("p")[:max_paragraphs]
-            html_content = "".join(str(p) for p in paragraphs)
-            soup = BeautifulSoup(html_content, "html.parser")
-
-        max_length = self.settings.get("SUMMARY_MAX_LENGTH", None)
-        end_suffix = self.settings.get("SUMMARY_END_SUFFIX", "...")
-
-        text_content = soup.get_text()
-        if max_length is not None:
-            words = text_content.split()
-            if len(words) > max_length:
-                text_content = " ".join(words[:max_length]) + end_suffix
-
-        return text_content
-
-
 def setup_quarto_project(pelican_instance):
     """Set up the Quarto project if a .qmd file is found."""
     content_path = Path(pelican_instance.settings["PATH"])
diff --git a/pelican/plugins/lm_pelican_quarto/readers.py b/pelican/plugins/lm_pelican_quarto/readers.py
new file mode 100644
index 0000000..498f62d
--- /dev/null
+++ b/pelican/plugins/lm_pelican_quarto/readers.py
@@ -0,0 +1,77 @@
+import logging
+import re
+
+from bs4 import BeautifulSoup
+import markdown
+import yaml
+
+from pelican import readers
+
+logger = logging.getLogger(__name__)
+QUARTO_EXTENSION = "qmd"
+
+
+class QuartoReader(readers.BaseReader):
+    """Read QMD Files using a Pelican Reader."""
+
+    enabled = True
+    file_extensions = [QUARTO_EXTENSION]
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._source_path = None
+
+    def read(self, filename):
+        """Read QMD Files."""
+        with open(filename, encoding="utf-8") as file:
+            content = file.read()
+
+        # extract yaml header and content body
+        _, front_matter, markdown_body = re.split(r"^---\s*$", content, 2, re.MULTILINE)
+
+        metadata = yaml.load(front_matter, Loader=yaml.FullLoader)
+        metadata = self._parse_metadata(metadata)
+
+        article_content = markdown.markdown(markdown_body)
+        metadata["summary"] = self.generate_article_summary(
+            metadata.get("summary"), article_content
+        )
+
+        return article_content, metadata
+
+    def _parse_metadata(self, meta):
+        """Parse and format the metadata from YAML."""
+        output = {}
+        for name, value in meta.items():
+            key = name.lower()
+            output[key] = self.process_metadata(key, value)
+
+        return output
+
+    def generate_article_summary(self, existing_summary, content):
+        """Generate a summary if one does not exist."""
+        if existing_summary:
+            return existing_summary
+
+        # strip code blocks
+        content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
+        html_content = markdown.markdown(content_no_code)
+
+        soup = BeautifulSoup(html_content, "html.parser")
+
+        max_paragraphs = self.settings.get("SUMMARY_MAX_PARAGRAPHS", None)
+        if max_paragraphs is not None:
+            paragraphs = soup.find_all("p")[:max_paragraphs]
+            html_content = "".join(str(p) for p in paragraphs)
+            soup = BeautifulSoup(html_content, "html.parser")
+
+        max_length = self.settings.get("SUMMARY_MAX_LENGTH", None)
+        end_suffix = self.settings.get("SUMMARY_END_SUFFIX", "...")
+
+        text_content = soup.get_text()
+        if max_length is not None:
+            words = text_content.split()
+            if len(words) > max_length:
+                text_content = " ".join(words[:max_length]) + end_suffix
+
+        return text_content
diff --git a/pelican/plugins/lm_pelican_quarto/tests/test_quarto.py b/pelican/plugins/lm_pelican_quarto/tests/test_quarto.py
index a38098e..e6f5cd4 100644
--- a/pelican/plugins/lm_pelican_quarto/tests/test_quarto.py
+++ b/pelican/plugins/lm_pelican_quarto/tests/test_quarto.py
@@ -28,8 +28,9 @@ def create_article(temp_path):
     article_content = """
 ---
 title: testqmd
-date: 2024-06-02
-category: test
+date: "2024-06-02"
+category: "test"
+tags: ["arts", "b"]
 ---
 Hi
 
@@ -58,9 +59,10 @@ def create_nested_article(temp_path):
 
     article_content = """
 ---
-title: testqmd
-date: 2024-06-02
-category: test
+title: "testqmd"
+date: "2024-06-02"
+category: "test"
+tags: ["a", "b"]
 ---
 Hi