Skip to content

Commit

Permalink
Merge pull request #32 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
Fix table bug
  • Loading branch information
VikParuchuri authored Dec 6, 2023
2 parents 19d7f37 + 4fc3bd9 commit f7734fb
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 19 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ First, clone the repo:
- `poetry shell` to activate your poetry venv
- Update pytorch since poetry doesn't play nicely with it
- GPU only: run `pip install torch` to install other torch dependencies.
- CPU only: Uninstall torch, then follow the [CPU install](https://pytorch.org/get-started/locally/) instructions.
- CPU only: Uninstall torch with `poetry remove torch`, then follow the [CPU install](https://pytorch.org/get-started/locally/) instructions.

## Mac

Expand Down
7 changes: 3 additions & 4 deletions marker/cleaners/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,24 @@


def merge_table_blocks(blocks: List[Page]):
last_block = None
current_lines = []
current_bbox = None
for page in blocks:
new_page_blocks = []
pnum = page.pnum
for block in page.blocks:
if block.most_common_block_type() != "Table":
if len(current_lines) > 0:
new_block = Block(
lines=deepcopy(current_lines),
pnum=last_block.pnum,
pnum=pnum,
bbox=current_bbox
)
new_page_blocks.append(new_block)
current_lines = []
current_bbox = None

new_page_blocks.append(block)
last_block = block
continue

current_lines.extend(block.lines)
Expand All @@ -38,7 +37,7 @@ def merge_table_blocks(blocks: List[Page]):
if len(current_lines) > 0:
new_block = Block(
lines=deepcopy(current_lines),
pnum=last_block.pnum,
pnum=pnum,
bbox=current_bbox
)
new_page_blocks.append(new_block)
Expand Down
4 changes: 2 additions & 2 deletions marker/debug/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def dump_nougat_debug_data(doc, images, converted_spans):
if not settings.DEBUG or not settings.DEBUG_DATA_FOLDER:
if not settings.DEBUG_DATA_FOLDER:
return

# We attempted one conversion per image
Expand Down Expand Up @@ -41,7 +41,7 @@ def dump_nougat_debug_data(doc, images, converted_spans):


def dump_bbox_debug_data(doc, blocks: List[Page]):
if not settings.DEBUG or not settings.DEBUG_DATA_FOLDER:
if not settings.DEBUG_DATA_FOLDER:
return

# Remove extension from doc name
Expand Down
12 changes: 0 additions & 12 deletions requirements.txt

This file was deleted.

0 comments on commit f7734fb

Please sign in to comment.