From 39bb46f497800c3231b0011996bbc8b6f90f0ee5 Mon Sep 17 00:00:00 2001 From: Hollow Man Date: Sun, 11 Jun 2023 21:41:21 +0300 Subject: [PATCH] mdbook-pdf v0.1.7, mdbook-pdf-outline v0.1.4: Fix several bugs for table of content generation Signed-off-by: Hollow Man --- Cargo.toml | 2 +- mdbook_pdf_outline/mdbook_pdf_outline.py | 148 ++++++++++++++++------- setup.py | 66 ++++++---- src/main.rs | 42 ++++++- 4 files changed, 180 insertions(+), 78 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b094093..4d31848 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ license = "GPL-3.0" name = "mdbook-pdf" readme = "README.md" repository = "https://github.com/HollowMan6/mdbook-pdf" -version = "0.1.6" +version = "0.1.7" include = [ "**/*.rs", "Cargo.toml", diff --git a/mdbook_pdf_outline/mdbook_pdf_outline.py b/mdbook_pdf_outline/mdbook_pdf_outline.py index cb4fc1c..c7d16a3 100755 --- a/mdbook_pdf_outline/mdbook_pdf_outline.py +++ b/mdbook_pdf_outline/mdbook_pdf_outline.py @@ -1,16 +1,25 @@ #!/usr/bin/env python3 # vim: set fileencoding=utf-8 : # vim: set et ts=4 sw=4: -''' +""" mdbook-pdf-outline + An outline (Table of Content) generator for mdBook-pdf. + Author: Hollow Man + License: GPL-3.0 - Copyright © 2022 Hollow Man(@HollowMan6). All rights reserved. + Copyright © 2022-2023 Hollow Man (@HollowMan6). All rights reserved. This document is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. -''' + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along with this program. + If not, see . +""" from pypdf import PdfReader, PdfWriter @@ -19,15 +28,47 @@ import lxml.html import re import json +import sys + + +buffer = [] -def update_parent_dict(parent_dict, level, node): +def get_parent(level, parent_dict): + if level > 1: + temp = parent_dict + for _ in range(1, level - 1): + if temp["child"] and temp["child"]["node"]: + temp = temp["child"] + return temp["node"] + return None + + +def update_parent_dict( + parent_dict, level, writer, text, page, fit=None, handle_buffer=False +): temp = parent_dict for _ in range(1, level): if not temp["child"]: temp["child"] = {"node": None, "child": {}} temp = temp["child"] - temp["node"] = node + + if not handle_buffer: + if page is None: + buffer.append((level, text)) + return + else: + # Flush buffer so that ToC items without page destinations are + # added to the outline with the next page destination + for item in buffer: + update_parent_dict( + parent_dict, item[0], writer, item[1], page, fit, True + ) + buffer.clear() + + temp["node"] = writer.add_outline_item( + text, page, get_parent(level, parent_dict), fit=fit + ) temp["child"] = {} @@ -46,24 +87,20 @@ def add_wkhtmltopdf_like_outline(html_page, reader, writer): if not results.tag[1:].isdigit(): continue level = int(results.tag[1:]) - dest = reader.named_destinations["/{}".format( - urllib.parse.quote(id))] - parent = None - if level > 1: - temp = parent_dict - for _ in range(1, level - 1): - if temp["child"] and temp["child"]["node"]: - temp = temp["child"] - parent = temp["node"] - - if dest.get('/Type') == '/Fit': - update_parent_dict(parent_dict, level, writer.add_outline_item( - results.text_content(), None, parent)) - continue - update_parent_dict(parent_dict, level, writer.add_outline_item( - results.text_content(), reader.get_destination_page_number( - dest), parent, fit=Fit( - dest.get('/Type'), (dest.get('/Left'), dest.get('/Top'), dest.get('/Zoom'))))) + dest = reader.named_destinations["/{}".format(urllib.parse.quote(id))] + + page = None + fit = None + if dest.get("/Type") != "/Fit": + page = reader.get_destination_page_number(dest) + fit = Fit( + dest.get("/Type"), + (dest.get("/Left"), dest.get("/Top"), dest.get("/Zoom")), + ) + + update_parent_dict( + parent_dict, level, writer, results.text_content(), page, fit + ) def parse_toc(toc, reader, writer, parent_dict, level=1): @@ -75,7 +112,11 @@ def parse_toc(toc, reader, writer, parent_dict, level=1): dest_name = "" target_element = None for element in head.iter(): - if element.tag == "a" or element.tag == "div": + if ( + element.tag == "a" + or element.tag == "div" + or element.find_class("part-title") + ): target_element = element break to_remove = head.find_class("toggle") @@ -84,39 +125,34 @@ def parse_toc(toc, reader, writer, parent_dict, level=1): if target_element is None: continue dest = None - parent = None - if "href" in element.attrib: + if "href" in target_element.attrib: for content in target_element.attrib["href"].split("#"): - dest_name += content.rstrip(".html").replace("/", - "-") + "-" + dest_name += content.removesuffix(".html").replace("/", "-") + "-" dest_name = dest_name.rstrip("-") dest_name = "/{}".format(urllib.parse.quote(dest_name.lower())) - dest_name = dest_name.replace(".", "") + if dest_name in reader.named_destinations: dest = reader.named_destinations[dest_name] else: + print("Warning: Destination not found: {}".format(dest_name)) for d in reader.named_destinations.items(): if d[0].startswith(dest_name): dest = d[1] break - if not dest: - continue - if level > 1: - temp = parent_dict - for _ in range(1, level - 1): - if temp["child"] and temp["child"]["node"]: - temp = temp["child"] - parent = temp["node"] - - if dest.get('/Type') == '/Fit': - update_parent_dict(parent_dict, level, writer.add_outline_item( - head.text_content(), None, parent)) - continue + page = None + fit = None if dest: - page = reader.get_destination_page_number(dest) - update_parent_dict(parent_dict, level, writer.add_outline_item( - head.text_content(), page, parent)) + if dest.get("/Type") != "/Fit": + page = reader.get_destination_page_number(dest) + fit = Fit( + dest.get("/Type"), + (dest.get("/Left"), dest.get("/Top"), dest.get("/Zoom")), + ) + + update_parent_dict( + parent_dict, level, writer, head.text_content(), page, fit + ) def add_toc_outline(html_page, reader, writer): @@ -132,7 +168,10 @@ def add_toc_outline(html_page, reader, writer): def main(): - conf = json.loads(input())["config"]["output"]["pdf-outline"] + sys.stdin.reconfigure(encoding="utf8") + context = json.loads(sys.stdin.read()) + + conf = context["config"]["output"]["pdf-outline"] reader = PdfReader("../pdf/output.pdf") @@ -144,6 +183,23 @@ def main(): else: add_toc_outline("../html/print.html", reader, writer) + meta = context["config"]["book"] + try: + writer.add_metadata( + { + "/DisplayDocTitle": True, + "/Title": meta.get("title") or "", + "/Author": ", ".join(meta["authors"]), + "/Subject": meta.get("description") or "", + "/CreationDate": reader.metadata["/CreationDate"], + "/ModDate": reader.metadata["/ModDate"], + "/Creator": "mdBook-pdf", + "/Lang": meta.get("language") or "", + } + ) + except Exception: + pass + with open("output.pdf", "wb") as f: writer.write(f) diff --git a/setup.py b/setup.py index f4194d4..d68e25f 100755 --- a/setup.py +++ b/setup.py @@ -1,44 +1,58 @@ #!/usr/bin/env python3 # vim: set fileencoding=utf-8 : # vim: set et ts=4 sw=4: -''' +""" mdbook-pdf-outline + An outline (Table of Content) generator for mdBook-pdf. + Author: Hollow Man + License: GPL-3.0 - Copyright © 2022 Hollow Man(@HollowMan6). All rights reserved. + Copyright © 2022-2023 Hollow Man (@HollowMan6). All rights reserved. This document is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. -''' + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along with this program. + If not, see . +""" from setuptools import setup # read the contents of README file from os import path + this_directory = path.abspath(path.dirname(__file__)) -with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: +with open(path.join(this_directory, "README.md"), encoding="utf-8") as f: long_description = f.read() -setup(name='mdbook-pdf-outline', - version='0.1.3', - description='Tool for generating outlines for PDF files generated by mdbook-pdf.', - url='https://github.com/HollowMan6/mdbook-pdf', - author='Hollow Man (Domain Address)', - author_email='hollowman@opensuse.org', - license='GPL-3.0-or-later', - install_requires=['lxml', 'pypdf'], - packages=['mdbook_pdf_outline'], - entry_points={'console_scripts': [ - 'mdbook-pdf-outline=mdbook_pdf_outline.mdbook_pdf_outline:main']}, - long_description=long_description, - project_urls={ - "Bug Tracker": "https://github.com/HollowMan6/mdbook-pdf/issues", - }, - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - "Topic :: Text Processing :: Markup :: Markdown", - ], - long_description_content_type='text/markdown' - ) +setup( + name="mdbook-pdf-outline", + version="0.1.4", + description="Tool for generating outlines for PDF files generated by mdbook-pdf.", + url="https://github.com/HollowMan6/mdbook-pdf", + author="Hollow Man (Domain Address)", + author_email="hollowman@opensuse.org", + license="GPL-3.0-or-later", + install_requires=["lxml", "pypdf"], + packages=["mdbook_pdf_outline"], + entry_points={ + "console_scripts": [ + "mdbook-pdf-outline=mdbook_pdf_outline.mdbook_pdf_outline:main" + ] + }, + long_description=long_description, + project_urls={ + "Bug Tracker": "https://github.com/HollowMan6/mdbook-pdf/issues", + }, + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Topic :: Text Processing :: Markup :: Markdown", + ], + long_description_content_type="text/markdown", +) diff --git a/src/main.rs b/src/main.rs index befaa3c..6104c16 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,11 @@ /** * mdbook-pdf - * Copyright (C) 2022-2023 Hollow Man + * A PDF generator for mdBook using headless Chrome. + * + * Author: Hollow Man + * License: GPL-3.0 + * + * Copyright (C) 2022-2023 Hollow Man (@HollowMan6) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -59,9 +64,31 @@ fn main() -> Result<(), Box> { let mut buf_reader = BufReader::new(file); let mut contents = String::new(); buf_reader.read_to_string(&mut contents)?; - contents = contents.replacen( - "", - " + + // Insert a link to the page div in the print.html to make sure that generated pdf + // contains the destination for ToC to locate the specific page in pdf. + let mut toc_fix = "
".to_owned(); + for item in ctx.book.iter() { + if let mdbook::book::BookItem::Chapter(chapter) = item { + let path = chapter.path.clone(); + if let Some(path) = path { + let print_page_id = { + let mut base = path.display().to_string(); + if base.ends_with(".md") { + base.truncate(base.len() - 3); + } + &base + .replace("/", "-") + .replace("\\", "-") + .to_ascii_lowercase() + }; + toc_fix.push_str(&(format!(r##"{print_page_id}"##))); + } + } + } + toc_fix.push_str("
"); + + let script = " ", + + ".to_owned(); + + contents = contents.replacen( + "", + &(script + &toc_fix), 1, ); if !cfg.static_site_url.is_empty() {