Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Nov 19, 2024
1 parent cc08b4b commit b8301ee
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 8 deletions.
2 changes: 1 addition & 1 deletion marker/v2/providers/pdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import atexit
import functools
from typing import Dict, List, Optional, Set, Tuple
from typing import List, Set

import pypdfium2 as pdfium
from pdftext.extraction import dictionary_output
Expand Down
12 changes: 12 additions & 0 deletions tests/processors/test_document_toc_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pytest

from marker.v2.processors.document_toc import DocumentTOCProcessor


@pytest.mark.config({"page_range": [0]})
def test_table_processor(pdf_document, detection_model, recognition_model, table_rec_model):
processor = DocumentTOCProcessor()
processor(pdf_document)

assert len(pdf_document.table_of_contents) == 3
assert pdf_document.table_of_contents[0].text == "Subspace Adversarial Training"
7 changes: 3 additions & 4 deletions tests/providers/test_pdf_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ def test_pdf_provider(pdf_provider):
assert pdf_provider.get_image(0, 72).size == (612, 792)
assert pdf_provider.get_image(0, 96).size == (816, 1056)

page_lines = pdf_provider.get_page_spans(0)
spans_list = [span for line in page_lines for span in line.spans]
assert len(spans_list) == 93
page_lines = pdf_provider.get_page_lines(0)
assert len(page_lines) == 93

spans = spans_list[0]
spans = page_lines[0].spans
assert len(spans) == 2
assert spans[0].text == "Subspace Adversarial Training"
assert spans[0].font == "NimbusRomNo9L-Medi"
Expand Down
4 changes: 1 addition & 3 deletions tests/renderers/test_markdown_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,4 @@ def test_markdown_renderer_pagination(pdf_document):
def test_markdown_renderer_metadata(pdf_document):
renderer = MarkdownRenderer({"paginate_output": True})
metadata = renderer(pdf_document).metadata
assert "table_of_contents" in metadata

assert "Subspace Adversarial Training" in metadata["table_of_contents"][0]["title"]
assert "table_of_contents" in metadata

0 comments on commit b8301ee

Please sign in to comment.