From bbbae9de9df7882e4b0d97128e0e2bb6437f58eb Mon Sep 17 00:00:00 2001 From: Pierre-Loic Doulcet Date: Tue, 10 Sep 2024 10:53:53 -0700 Subject: [PATCH] do not attach a filepath when a stram of bytes is passed (#394) --- llama_parse/base.py | 11 +++++++---- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llama_parse/base.py b/llama_parse/base.py index 8468f40..9937963 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -416,12 +416,13 @@ async def _aget_json( job_id = await self._create_job(file_path, extra_info=extra_info) if self.verbose: print("Started parsing the file under job_id %s" % job_id) - result = await self._get_job_result(job_id, "json") result["job_id"] = job_id - result["file_path"] = file_path - return [result] + if not isinstance(file_path, (bytes, BufferedIOBase)): + result["file_path"] = str(file_path) + + return [result] except Exception as e: file_repr = file_path if isinstance(file_path, str) else "" print(f"Error while parsing the file '{file_repr}':", e) @@ -506,7 +507,9 @@ async def aget_images( image["path"] = image_path image["job_id"] = job_id - image["original_pdf_path"] = result["file_path"] + + image["original_file_path"] = result.get("file_path", None) + image["page_number"] = page["page"] with open(image_path, "wb") as f: image_url = f"{self.base_url}/api/parsing/job/{job_id}/result/image/{image_name}" diff --git a/pyproject.toml b/pyproject.toml index ef33b93..bcdacd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "llama-parse" -version = "0.5.4" +version = "0.5.5" description = "Parse files into RAG-Optimized formats." authors = ["Logan Markewich "] license = "MIT"