Skip to content

Commit

Permalink
Merge pull request #76 from dongcartney92/patch-1
Browse files Browse the repository at this point in the history
get_mime_type bugfix
  • Loading branch information
Filimoa authored Nov 7, 2024
2 parents 0986c20 + e997dd5 commit 421b0ef
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/openparse/text/pdfminer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
LTTextContainer,
LTTextLine,
)
from pdfminer.psparser import PSLiteral
from pydantic import BaseModel, model_validator

from openparse.pdf import Pdf
Expand Down Expand Up @@ -64,8 +65,8 @@ def _extract_chars(text_line: LTTextLine) -> List[CharElement]:


def get_mime_type(pdf_object: LTImage) -> Optional[str]:
subtype = pdf_object.stream.attrs.get("Subtype", {"name": None}).name
filter_ = pdf_object.stream.attrs.get("Filter", {"name": None}).name
subtype = pdf_object.stream.attrs.get("Subtype", PSLiteral(None)).name
filter_ = pdf_object.stream.attrs.get("Filter", PSLiteral(None)).name
if subtype == "Image":
if filter_ == "DCTDecode":
return "image/jpeg"
Expand Down

0 comments on commit 421b0ef

Please sign in to comment.