From 924b97411ac5046343c85ce70d17b91df87a8117 Mon Sep 17 00:00:00 2001 From: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:18:22 +0100 Subject: [PATCH] Added get_image to DocItem and FloatingItem Signed-off-by: Shubham Gupta <26436285+sh-gupta@users.noreply.github.com> --- docling_core/types/doc/document.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 598f5ee..3d5419e 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -551,6 +551,23 @@ def get_location_tokens( return location + def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]: + """Returns the image of this DocItem if the document stores page images.""" + if not len(self.prov): + return None + + page = doc.pages.get(self.prov[0].page_no) + if page is None or page.size is None or page.image is None: + return None + + page_image = page.image.pil_image + crop_bbox = ( + self.prov[0] + .bbox.to_top_left_origin(page_height=page.size.height) + .scaled(scale=page_image.height / page.size.height) + ) + return page_image.crop(crop_bbox.as_tuple()) + class TextItem(DocItem): """TextItem.""" @@ -633,6 +650,12 @@ def caption_text(self, doc: "DoclingDocument") -> str: text += cap.resolve(doc).text return text + def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]: + """Returns the image corresponding to FloatingItem.""" + if self.image is not None: + return self.image.pil_image + return super().get_image(doc=doc) + class PictureItem(FloatingItem): """PictureItem."""