Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --page-number-offset to change page numbers #90

Merged
merged 1 commit into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pdfannots/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
MD_FORMAT_ARGS = frozenset([
'condense',
'group_highlights_by_color',
'page_number_offset',
'print_filename',
'sections',
'use_page_labels',
Expand Down Expand Up @@ -77,6 +78,8 @@ def parse_args() -> typ.Tuple[argparse.Namespace, LAParams]:
help="Emit annotations as a blockquote regardless of length.")
g.add_argument("--no-page-labels", dest="use_page_labels", default=True, action="store_false",
help="Ignore page labels if present, just print 1-based page numbers.")
g.add_argument("--page-number-offset", dest="page_number_offset", default=0, type=int,
help="Increase or decrease page numbers with a fixed offset.")
g.add_argument("--print-filename", dest="print_filename", default=False, action="store_true",
help="Print the name of each file with annotations.")
g.add_argument("-w", "--wrap", dest="wrap_column", metavar="COLS", type=int,
Expand Down
14 changes: 10 additions & 4 deletions pdfannots/printer/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,14 @@ def __init__(
self,
*,
condense: bool = True, # Permit use of the condensed format
page_number_offset: int = 0, # Page number offset
print_filename: bool = False, # Whether to print file names
remove_hyphens: bool = True, # Whether to remove hyphens across a line break
use_page_labels: bool = True, # Whether to use page labels
wrap_column: typ.Optional[int] = None, # Column at which output is word-wrapped
**kwargs: typ.Any # Other args, ignored
) -> None:
self.page_number_offset = page_number_offset
self.print_filename = print_filename
self.remove_hyphens = remove_hyphens
self.use_page_labels = use_page_labels
Expand Down Expand Up @@ -140,10 +142,13 @@ def print_file(
def format_pos(
pos: Pos,
document: Document,
use_page_label: bool
use_page_label: bool,
page_number_offset: int
) -> str:

result = pos.page.format_name(use_label=use_page_label).title()
result = pos.page.format_name(
use_label=use_page_label,
page_number_offset=page_number_offset).title()

o = document.nearest_outline(pos)
if o:
Expand Down Expand Up @@ -230,8 +235,9 @@ def format_annot(

# compute the formatted position (and extra bit if needed) as a label
assert annot.pos is not None
label = self.format_pos(annot.pos, document, self.use_page_labels) + \
(" " + extra if extra else "") + ":"
label = self.format_pos(
annot.pos, document, self.use_page_labels, self.page_number_offset
) + (" " + extra if extra else "") + ":"

# If we have short (few words) text with a short or no comment, and the
# text contains no embedded full stops or quotes, then we'll just put
Expand Down
4 changes: 2 additions & 2 deletions pdfannots/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,12 @@ def __repr__(self) -> str:
def __str__(self) -> str:
return self.format_name()

def format_name(self, use_label: bool = True) -> str:
def format_name(self, use_label: bool = True, page_number_offset: int = 0) -> str:
if self.label and use_label:
return 'page %s' % self.label
else:
# + 1 for 1-based page numbers in normal program output (error messages, etc.)
return 'page #%d' % (self.pageno + 1)
return 'page #%d' % (self.pageno + 1 + page_number_offset)
0xabu marked this conversation as resolved.
Show resolved Hide resolved

def __eq__(self, other: object) -> bool:
if not isinstance(other, Page):
Expand Down
12 changes: 12 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import operator
import pathlib
import re
import typing as typ
import unittest
from datetime import datetime, timedelta, timezone
Expand Down Expand Up @@ -280,6 +281,17 @@ def test_flat(self) -> None:
self.assertGreater(linecount, 5)
self.assertGreater(charcount, 500)

def test_flat_page_number_offset(self) -> None:
p = MarkdownPrinter(page_number_offset=-1)

page_numbers = []
for line in p.print_file('dummyfile', self.doc):
m = re.match(r'.+Page #([0-9])', line)
if m:
page_numbers.append(m[1])

self.assertEqual(page_numbers, ['0', '0', '1', '1', '1', '1', '3', '3', '3'])

def test_grouped(self) -> None:
p = GroupedMarkdownPrinter(wrap_column=80)

Expand Down
Loading