Skip to content

Commit

Permalink
fix: fix resolution in case of URL without path
Browse files Browse the repository at this point in the history
Signed-off-by: Panos Vagenas <[email protected]>
  • Loading branch information
vagenas committed Oct 24, 2024
1 parent 0cd8b94 commit 72d23ea
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docling_core/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def resolve_file_source(source: Union[Path, AnyHttpUrl, str]) -> Path:
break
# otherwise, use name from URL:
if fname is None:
fname = Path(http_url.path or "file").name
fname = Path(http_url.path or "").name or "file"
local_path = Path(tempfile.mkdtemp()) / fname
with open(local_path, "wb") as f:
for chunk in res.iter_content(chunk_size=1024): # using 1-KB chunks
Expand Down
23 changes: 23 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import json

from pydantic import Field
from requests import Response

from docling_core.utils.alias import AliasModel
from docling_core.utils.file import resolve_file_source


def test_alias_model():
Expand Down Expand Up @@ -47,3 +49,24 @@ class AliasModelGrandChild(AliasModelChild):

assert obj.model_dump_json() == json.dumps(data_alias, separators=(",", ":"))
assert obj.model_dump_json() != json.dumps(data, separators=(",", ":"))


def test_resolve_file_source_url_wout_path(monkeypatch):
expected_str = "foo"
expected_bytes = bytes(expected_str, "utf-8")

def get_dummy_response(*args, **kwargs):
r = Response()
r.status_code = 200
r._content = expected_bytes
return r

monkeypatch.setattr("requests.get", get_dummy_response)
monkeypatch.setattr(
"requests.models.Response.iter_content",
lambda *args, **kwargs: [expected_bytes],
)
path = resolve_file_source("https://pypi.org")
with open(path) as f:
text = f.read()
assert text == expected_str

0 comments on commit 72d23ea

Please sign in to comment.