diff --git a/pdfannots/cli.py b/pdfannots/cli.py index 9e926fe..58692af 100644 --- a/pdfannots/cli.py +++ b/pdfannots/cli.py @@ -14,6 +14,7 @@ MD_FORMAT_ARGS = frozenset([ 'condense', 'group_highlights_by_color', + 'page_number_offset', 'print_filename', 'sections', 'use_page_labels', @@ -77,6 +78,8 @@ def parse_args() -> typ.Tuple[argparse.Namespace, LAParams]: help="Emit annotations as a blockquote regardless of length.") g.add_argument("--no-page-labels", dest="use_page_labels", default=True, action="store_false", help="Ignore page labels if present, just print 1-based page numbers.") + g.add_argument("--page-number-offset", dest="page_number_offset", default=0, type=int, + help="Increase or decrease page numbers with a fixed offset.") g.add_argument("--print-filename", dest="print_filename", default=False, action="store_true", help="Print the name of each file with annotations.") g.add_argument("-w", "--wrap", dest="wrap_column", metavar="COLS", type=int, diff --git a/pdfannots/printer/markdown.py b/pdfannots/printer/markdown.py index cf790dc..3c248ae 100644 --- a/pdfannots/printer/markdown.py +++ b/pdfannots/printer/markdown.py @@ -86,12 +86,14 @@ def __init__( self, *, condense: bool = True, # Permit use of the condensed format + page_number_offset: int = 0, # Page number offset print_filename: bool = False, # Whether to print file names remove_hyphens: bool = True, # Whether to remove hyphens across a line break use_page_labels: bool = True, # Whether to use page labels wrap_column: typ.Optional[int] = None, # Column at which output is word-wrapped **kwargs: typ.Any # Other args, ignored ) -> None: + self.page_number_offset = page_number_offset self.print_filename = print_filename self.remove_hyphens = remove_hyphens self.use_page_labels = use_page_labels @@ -140,10 +142,13 @@ def print_file( def format_pos( pos: Pos, document: Document, - use_page_label: bool + use_page_label: bool, + page_number_offset: int ) -> str: - result = pos.page.format_name(use_label=use_page_label).title() + result = pos.page.format_name( + use_label=use_page_label, + page_number_offset=page_number_offset).title() o = document.nearest_outline(pos) if o: @@ -230,7 +235,7 @@ def format_annot( # compute the formatted position (and extra bit if needed) as a label assert annot.pos is not None - label = self.format_pos(annot.pos, document, self.use_page_labels) + \ + label = self.format_pos(annot.pos, document, self.use_page_labels, self.page_number_offset) + \ (" " + extra if extra else "") + ":" # If we have short (few words) text with a short or no comment, and the diff --git a/pdfannots/types.py b/pdfannots/types.py index a7b7531..b61fba4 100644 --- a/pdfannots/types.py +++ b/pdfannots/types.py @@ -133,12 +133,12 @@ def __repr__(self) -> str: def __str__(self) -> str: return self.format_name() - def format_name(self, use_label: bool = True) -> str: + def format_name(self, use_label: bool = True, page_number_offset: int = 0) -> str: if self.label and use_label: return 'page %s' % self.label else: # + 1 for 1-based page numbers in normal program output (error messages, etc.) - return 'page #%d' % (self.pageno + 1) + return 'page #%d' % (self.pageno + 1 + page_number_offset) def __eq__(self, other: object) -> bool: if not isinstance(other, Page): diff --git a/tests.py b/tests.py index 174b0df..9fb3363 100755 --- a/tests.py +++ b/tests.py @@ -4,6 +4,7 @@ import json import operator import pathlib +import re import typing as typ import unittest from datetime import datetime, timedelta, timezone @@ -280,6 +281,17 @@ def test_flat(self) -> None: self.assertGreater(linecount, 5) self.assertGreater(charcount, 500) + def test_flat_page_number_offset(self) -> None: + p = MarkdownPrinter(page_number_offset=-1) + + page_numbers = [] + for line in p.print_file('dummyfile', self.doc): + m = re.match(r'.+Page #([0-9])', line) + if m: + page_numbers.append(m[1]) + + self.assertEqual(page_numbers, ['0', '0', '1', '1', '1', '1', '3', '3', '3']) + def test_grouped(self) -> None: p = GroupedMarkdownPrinter(wrap_column=80)