diff --git a/docling_core/utils/file.py b/docling_core/utils/file.py index 693f7bb..7e38074 100644 --- a/docling_core/utils/file.py +++ b/docling_core/utils/file.py @@ -41,7 +41,7 @@ def resolve_file_source(source: Union[Path, AnyHttpUrl, str]) -> Path: break # otherwise, use name from URL: if fname is None: - fname = Path(http_url.path or "file").name + fname = Path(http_url.path or "").name or "file" local_path = Path(tempfile.mkdtemp()) / fname with open(local_path, "wb") as f: for chunk in res.iter_content(chunk_size=1024): # using 1-KB chunks diff --git a/test/test_utils.py b/test/test_utils.py index a27bce4..472e6af 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -7,8 +7,10 @@ import json from pydantic import Field +from requests import Response from docling_core.utils.alias import AliasModel +from docling_core.utils.file import resolve_file_source def test_alias_model(): @@ -47,3 +49,24 @@ class AliasModelGrandChild(AliasModelChild): assert obj.model_dump_json() == json.dumps(data_alias, separators=(",", ":")) assert obj.model_dump_json() != json.dumps(data, separators=(",", ":")) + + +def test_resolve_file_source_url_wout_path(monkeypatch): + expected_str = "foo" + expected_bytes = bytes(expected_str, "utf-8") + + def get_dummy_response(*args, **kwargs): + r = Response() + r.status_code = 200 + r._content = expected_bytes + return r + + monkeypatch.setattr("requests.get", get_dummy_response) + monkeypatch.setattr( + "requests.models.Response.iter_content", + lambda *args, **kwargs: [expected_bytes], + ) + path = resolve_file_source("https://pypi.org") + with open(path) as f: + text = f.read() + assert text == expected_str