Skip to content

Commit

Permalink
switch to skip_existing and fix html format outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
iammosespaulr committed Nov 21, 2024
1 parent b98e0a3 commit 1156b68
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 6 deletions.
2 changes: 1 addition & 1 deletion convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def process_single_pdf(args):

out_folder = config_parser.get_output_folder(fpath)
base_name = config_parser.get_base_filename(fpath)
if output_exists(out_folder, base_name):
if cli_options.get('skip_existing') and output_exists(out_folder, base_name):
return

try:
Expand Down
2 changes: 1 addition & 1 deletion marker/config/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def common_options(fn):
help="Path to JSON file with additional configuration.")(fn)
fn = click.option("--languages", type=str, default=None, help="Comma separated list of languages to use for OCR.")(fn)
fn = click.option("--disable_multiprocessing", is_flag=True, default=False, help="Disable multiprocessing.")(fn)
fn = click.option('-l', is_flag=True, help="List available builders, processors and converters")(fn)
fn = click.option("--skip_existing", is_flag=True, default=False, help="Skip existing files.")(fn)
return fn

def generate_config_dict(self) -> Dict[str, any]:
Expand Down
3 changes: 1 addition & 2 deletions marker/schema/blocks/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@ class Form(Block):
cells: List[SpanTableCell] | None = None

def assemble_html(self, child_blocks, parent_structure=None):
return html_format(self.cells)

return str(html_format(self.cells))
2 changes: 1 addition & 1 deletion marker/schema/blocks/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Table(Block):

def assemble_html(self, child_blocks, parent_structure=None):
if self.cells:
return html_format(self.cells)
return str(html_format(self.cells))
else:
template = super().assemble_html(child_blocks, parent_structure)
return f"<p>{template}</p>"
2 changes: 1 addition & 1 deletion marker/schema/blocks/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ class TableOfContents(Block):
cells: List[SpanTableCell] | None = None

def assemble_html(self, child_blocks, parent_structure=None):
return html_format(self.cells)
return str(html_format(self.cells))

0 comments on commit 1156b68

Please sign in to comment.