Skip to content

Commit

Permalink
mdbook-pdf v0.1.7: Fix several bugs for table of content generation
Browse files Browse the repository at this point in the history
Signed-off-by: Hollow Man <[email protected]>
  • Loading branch information
HollowMan6 committed Jun 11, 2023
1 parent 9574a5a commit 9199582
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 78 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ license = "GPL-3.0"
name = "mdbook-pdf"
readme = "README.md"
repository = "https://github.com/HollowMan6/mdbook-pdf"
version = "0.1.6"
version = "0.1.7"
include = [
"**/*.rs",
"Cargo.toml",
Expand Down
148 changes: 102 additions & 46 deletions mdbook_pdf_outline/mdbook_pdf_outline.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
#!/usr/bin/env python3
# vim: set fileencoding=utf-8 :
# vim: set et ts=4 sw=4:
'''
"""
mdbook-pdf-outline
An outline (Table of Content) generator for mdBook-pdf.
Author: Hollow Man <[email protected]>
License: GPL-3.0
Copyright © 2022 Hollow Man(@HollowMan6). All rights reserved.
Copyright © 2022-2023 Hollow Man (@HollowMan6). All rights reserved.
This document is free software; you can redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option)
any later version.
'''
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program.
If not, see <http://www.gnu.org/licenses/>.
"""


from pypdf import PdfReader, PdfWriter
Expand All @@ -19,15 +28,47 @@
import lxml.html
import re
import json
import sys


buffer = []


def update_parent_dict(parent_dict, level, node):
def get_parent(level, parent_dict):
if level > 1:
temp = parent_dict
for _ in range(1, level - 1):
if temp["child"] and temp["child"]["node"]:
temp = temp["child"]
return temp["node"]
return None


def update_parent_dict(
parent_dict, level, writer, text, page, fit=None, handle_buffer=False
):
temp = parent_dict
for _ in range(1, level):
if not temp["child"]:
temp["child"] = {"node": None, "child": {}}
temp = temp["child"]
temp["node"] = node

if not handle_buffer:
if page is None:
buffer.append((level, text))
return
else:
# Flush buffer so that ToC items without page destinations are
# added to the outline with the next page destination
for item in buffer:
update_parent_dict(
parent_dict, item[0], writer, item[1], page, fit, True
)
buffer.clear()

temp["node"] = writer.add_outline_item(
text, page, get_parent(level, parent_dict), fit=fit
)
temp["child"] = {}


Expand All @@ -46,24 +87,20 @@ def add_wkhtmltopdf_like_outline(html_page, reader, writer):
if not results.tag[1:].isdigit():
continue
level = int(results.tag[1:])
dest = reader.named_destinations["/{}".format(
urllib.parse.quote(id))]
parent = None
if level > 1:
temp = parent_dict
for _ in range(1, level - 1):
if temp["child"] and temp["child"]["node"]:
temp = temp["child"]
parent = temp["node"]

if dest.get('/Type') == '/Fit':
update_parent_dict(parent_dict, level, writer.add_outline_item(
results.text_content(), None, parent))
continue
update_parent_dict(parent_dict, level, writer.add_outline_item(
results.text_content(), reader.get_destination_page_number(
dest), parent, fit=Fit(
dest.get('/Type'), (dest.get('/Left'), dest.get('/Top'), dest.get('/Zoom')))))
dest = reader.named_destinations["/{}".format(urllib.parse.quote(id))]

page = None
fit = None
if dest.get("/Type") != "/Fit":
page = reader.get_destination_page_number(dest)
fit = Fit(
dest.get("/Type"),
(dest.get("/Left"), dest.get("/Top"), dest.get("/Zoom")),
)

update_parent_dict(
parent_dict, level, writer, results.text_content(), page, fit
)


def parse_toc(toc, reader, writer, parent_dict, level=1):
Expand All @@ -75,7 +112,11 @@ def parse_toc(toc, reader, writer, parent_dict, level=1):
dest_name = ""
target_element = None
for element in head.iter():
if element.tag == "a" or element.tag == "div":
if (
element.tag == "a"
or element.tag == "div"
or element.find_class("part-title")
):
target_element = element
break
to_remove = head.find_class("toggle")
Expand All @@ -84,39 +125,34 @@ def parse_toc(toc, reader, writer, parent_dict, level=1):
if target_element is None:
continue
dest = None
parent = None
if "href" in element.attrib:
if "href" in target_element.attrib:
for content in target_element.attrib["href"].split("#"):
dest_name += content.rstrip(".html").replace("/",
"-") + "-"
dest_name += content.removesuffix(".html").replace("/", "-") + "-"
dest_name = dest_name.rstrip("-")
dest_name = "/{}".format(urllib.parse.quote(dest_name.lower()))
dest_name = dest_name.replace(".", "")

if dest_name in reader.named_destinations:
dest = reader.named_destinations[dest_name]
else:
print("Dest not found: {}".format(dest_name))
for d in reader.named_destinations.items():
if d[0].startswith(dest_name):
dest = d[1]
break
if not dest:
continue
if level > 1:
temp = parent_dict
for _ in range(1, level - 1):
if temp["child"] and temp["child"]["node"]:
temp = temp["child"]
parent = temp["node"]

if dest.get('/Type') == '/Fit':
update_parent_dict(parent_dict, level, writer.add_outline_item(
head.text_content(), None, parent))
continue

page = None
fit = None
if dest:
page = reader.get_destination_page_number(dest)
update_parent_dict(parent_dict, level, writer.add_outline_item(
head.text_content(), page, parent))
if dest.get("/Type") != "/Fit":
page = reader.get_destination_page_number(dest)
fit = Fit(
dest.get("/Type"),
(dest.get("/Left"), dest.get("/Top"), dest.get("/Zoom")),
)

update_parent_dict(
parent_dict, level, writer, head.text_content(), page, fit
)


def add_toc_outline(html_page, reader, writer):
Expand All @@ -132,7 +168,10 @@ def add_toc_outline(html_page, reader, writer):


def main():
conf = json.loads(input())["config"]["output"]["pdf-outline"]
sys.stdin.reconfigure(encoding="utf8")
context = json.loads(sys.stdin.read())

conf = context["config"]["output"]["pdf-outline"]

reader = PdfReader("../pdf/output.pdf")

Expand All @@ -144,6 +183,23 @@ def main():
else:
add_toc_outline("../html/print.html", reader, writer)

meta = context["config"]["book"]
try:
writer.add_metadata(
{
"/DisplayDocTitle": True,
"/Title": meta.get("title") or "",
"/Author": ", ".join(meta["authors"]),
"/Subject": meta.get("description") or "",
"/CreationDate": reader.metadata["/CreationDate"],
"/ModDate": reader.metadata["/ModDate"],
"/Creator": "mdBook-pdf",
"/Lang": meta.get("language") or "",
}
)
except Exception:
pass

with open("output.pdf", "wb") as f:
writer.write(f)

Expand Down
66 changes: 40 additions & 26 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,58 @@
#!/usr/bin/env python3
# vim: set fileencoding=utf-8 :
# vim: set et ts=4 sw=4:
'''
"""
mdbook-pdf-outline
An outline (Table of Content) generator for mdBook-pdf.
Author: Hollow Man <[email protected]>
License: GPL-3.0
Copyright © 2022 Hollow Man(@HollowMan6). All rights reserved.
Copyright © 2022-2023 Hollow Man (@HollowMan6). All rights reserved.
This document is free software; you can redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option)
any later version.
'''
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program.
If not, see <http://www.gnu.org/licenses/>.
"""

from setuptools import setup

# read the contents of README file
from os import path

this_directory = path.abspath(path.dirname(__file__))
with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
with open(path.join(this_directory, "README.md"), encoding="utf-8") as f:
long_description = f.read()

setup(name='mdbook-pdf-outline',
version='0.1.3',
description='Tool for generating outlines for PDF files generated by mdbook-pdf.',
url='https://github.com/HollowMan6/mdbook-pdf',
author='Hollow Man (Domain Address)',
author_email='[email protected]',
license='GPL-3.0-or-later',
install_requires=['lxml', 'pypdf'],
packages=['mdbook_pdf_outline'],
entry_points={'console_scripts': [
'mdbook-pdf-outline=mdbook_pdf_outline.mdbook_pdf_outline:main']},
long_description=long_description,
project_urls={
"Bug Tracker": "https://github.com/HollowMan6/mdbook-pdf/issues",
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Topic :: Text Processing :: Markup :: Markdown",
],
long_description_content_type='text/markdown'
)
setup(
name="mdbook-pdf-outline",
version="0.1.4",
description="Tool for generating outlines for PDF files generated by mdbook-pdf.",
url="https://github.com/HollowMan6/mdbook-pdf",
author="Hollow Man (Domain Address)",
author_email="[email protected]",
license="GPL-3.0-or-later",
install_requires=["lxml", "pypdf"],
packages=["mdbook_pdf_outline"],
entry_points={
"console_scripts": [
"mdbook-pdf-outline=mdbook_pdf_outline.mdbook_pdf_outline:main"
]
},
long_description=long_description,
project_urls={
"Bug Tracker": "https://github.com/HollowMan6/mdbook-pdf/issues",
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Topic :: Text Processing :: Markup :: Markdown",
],
long_description_content_type="text/markdown",
)
42 changes: 37 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
/**
* mdbook-pdf
* Copyright (C) 2022-2023 Hollow Man
* A PDF generator for mdBook using headless Chrome.
*
* Author: Hollow Man <[email protected]>
* License: GPL-3.0
*
* Copyright (C) 2022-2023 Hollow Man (@HollowMan6)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -59,9 +64,31 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut buf_reader = BufReader::new(file);
let mut contents = String::new();
buf_reader.read_to_string(&mut contents)?;
contents = contents.replacen(
"</script>",
"</script>

// Insert a link to the page div in the print.html to make sure that generated pdf
// contains the destination for ToC to locate the specific page in pdf.
let mut toc_fix = "<div style=\"display: none\">".to_owned();
for item in ctx.book.iter() {
if let mdbook::book::BookItem::Chapter(chapter) = item {
let path = chapter.path.clone();
if let Some(path) = path {
let print_page_id = {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.truncate(base.len() - 3);
}
&base
.replace("/", "-")
.replace("\\", "-")
.to_ascii_lowercase()
};
toc_fix.push_str(&(format!(r##"<a href="#{print_page_id}">{print_page_id}</a>"##)));
}
}
}
toc_fix.push_str("</div>");

let script = "</script>
<!-- Custom JS scripts for mdbook-pdf PDF generation -->
<script type='text/javascript'>
Expand All @@ -86,7 +113,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
markAllContentHasLoadedForPrinting();
}
});
</script>",
</script>
".to_owned();

contents = contents.replacen(
"</script>",
&(script + &toc_fix),
1,
);
if !cfg.static_site_url.is_empty() {
Expand Down

0 comments on commit 9199582

Please sign in to comment.