diff --git a/llama_parse/base.py b/llama_parse/base.py index 867ea29..3279ba2 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -309,6 +309,11 @@ def get_json_result( def get_images(self, json_result: List[dict], download_path: str) -> List[dict]: """Download images from the parsed result.""" headers = {"Authorization": f"Bearer {self.api_key}"} + + # make the download path + if not os.path.exists(download_path): + os.makedirs(download_path) + try: images = [] for result in json_result: @@ -318,9 +323,16 @@ def get_images(self, json_result: List[dict], download_path: str) -> List[dict]: print(f"> Image for page {page['page']}: {page['images']}") for image in page["images"]: image_name = image["name"] + + # get the full path image_path = os.path.join( download_path, f"{job_id}-{image_name}" ) + + # get a valid image path + if not image_path.endswith(".png"): + image_path += ".png" + image["path"] = image_path image["job_id"] = job_id image["original_pdf_path"] = result["file_path"] diff --git a/pyproject.toml b/pyproject.toml index 5f9f620..72cf0c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "llama-parse" -version = "0.4.1" +version = "0.4.2" description = "Parse files into RAG-Optimized formats." authors = ["Logan Markewich "] license = "MIT"