Skip to content

Commit

Permalink
Add --page-number-offset to change page numbers
Browse files Browse the repository at this point in the history
This option can be used to add or subtract a fixed number from the
printed page numbers. This is useful if, for example, the document
includes a cover page and the page numbers of the PDF do not match the
page numbers of the document.
  • Loading branch information
gvtulder committed Feb 5, 2024
1 parent 2fdeacb commit 6aec786
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 5 deletions.
3 changes: 3 additions & 0 deletions pdfannots/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
MD_FORMAT_ARGS = frozenset([
'condense',
'group_highlights_by_color',
'page_number_offset',
'print_filename',
'sections',
'use_page_labels',
Expand Down Expand Up @@ -77,6 +78,8 @@ def parse_args() -> typ.Tuple[argparse.Namespace, LAParams]:
help="Emit annotations as a blockquote regardless of length.")
g.add_argument("--no-page-labels", dest="use_page_labels", default=True, action="store_false",
help="Ignore page labels if present, just print 1-based page numbers.")
g.add_argument("--page-number-offset", dest="page_number_offset", default=0, type=int,
help="Increase or decrease page numbers with a fixed offset.")
g.add_argument("--print-filename", dest="print_filename", default=False, action="store_true",
help="Print the name of each file with annotations.")
g.add_argument("-w", "--wrap", dest="wrap_column", metavar="COLS", type=int,
Expand Down
11 changes: 8 additions & 3 deletions pdfannots/printer/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,14 @@ def __init__(
self,
*,
condense: bool = True, # Permit use of the condensed format
page_number_offset: int = 0, # Page number offset
print_filename: bool = False, # Whether to print file names
remove_hyphens: bool = True, # Whether to remove hyphens across a line break
use_page_labels: bool = True, # Whether to use page labels
wrap_column: typ.Optional[int] = None, # Column at which output is word-wrapped
**kwargs: typ.Any # Other args, ignored
) -> None:
self.page_number_offset = page_number_offset
self.print_filename = print_filename
self.remove_hyphens = remove_hyphens
self.use_page_labels = use_page_labels
Expand Down Expand Up @@ -140,10 +142,13 @@ def print_file(
def format_pos(
pos: Pos,
document: Document,
use_page_label: bool
use_page_label: bool,
page_number_offset: int
) -> str:

result = pos.page.format_name(use_label=use_page_label).title()
result = pos.page.format_name(
use_label=use_page_label,
page_number_offset=page_number_offset).title()

o = document.nearest_outline(pos)
if o:
Expand Down Expand Up @@ -230,7 +235,7 @@ def format_annot(

# compute the formatted position (and extra bit if needed) as a label
assert annot.pos is not None
label = self.format_pos(annot.pos, document, self.use_page_labels) + \
label = self.format_pos(annot.pos, document, self.use_page_labels, self.page_number_offset) + \
(" " + extra if extra else "") + ":"

# If we have short (few words) text with a short or no comment, and the
Expand Down
4 changes: 2 additions & 2 deletions pdfannots/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,12 @@ def __repr__(self) -> str:
def __str__(self) -> str:
return self.format_name()

def format_name(self, use_label: bool = True) -> str:
def format_name(self, use_label: bool = True, page_number_offset: int = 0) -> str:
if self.label and use_label:
return 'page %s' % self.label
else:
# + 1 for 1-based page numbers in normal program output (error messages, etc.)
return 'page #%d' % (self.pageno + 1)
return 'page #%d' % (self.pageno + 1 + page_number_offset)

def __eq__(self, other: object) -> bool:
if not isinstance(other, Page):
Expand Down
12 changes: 12 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import operator
import pathlib
import re
import typing as typ
import unittest
from datetime import datetime, timedelta, timezone
Expand Down Expand Up @@ -280,6 +281,17 @@ def test_flat(self) -> None:
self.assertGreater(linecount, 5)
self.assertGreater(charcount, 500)

def test_flat_page_number_offset(self) -> None:
p = MarkdownPrinter(page_number_offset=-1)

page_numbers = []
for line in p.print_file('dummyfile', self.doc):
m = re.match(r'.+Page #([0-9])', line)
if m:
page_numbers.append(m[1])

self.assertEqual(page_numbers, ['0', '0', '1', '1', '1', '1', '3', '3', '3'])

def test_grouped(self) -> None:
p = GroupedMarkdownPrinter(wrap_column=80)

Expand Down

0 comments on commit 6aec786

Please sign in to comment.