From 9200a97a53d204a3b56eedcd10466a3920b11c55 Mon Sep 17 00:00:00 2001 From: Domenico Andreoli Date: Fri, 29 Nov 2024 09:46:32 +0100 Subject: [PATCH] Spin off `geneve/utils/resource.py` --- geneve/utils/__init__.py | 64 +------------------------------ geneve/utils/dirs.py | 12 ++++++ geneve/utils/resource.py | 81 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 62 deletions(-) create mode 100644 geneve/utils/resource.py diff --git a/geneve/utils/__init__.py b/geneve/utils/__init__.py index 24498f16..4d434d60 100644 --- a/geneve/utils/__init__.py +++ b/geneve/utils/__init__.py @@ -20,16 +20,15 @@ import functools import json import re -import shutil import sys -from contextlib import contextmanager from pathlib import Path from random import Random -from tempfile import mkdtemp from types import SimpleNamespace from urllib.parse import urlparse, urlunparse from . import dirs, epr +from .dirs import tempdir as tempdir +from .resource import resource as resource random = Random() wc_re = re.compile(r"\*|\?") @@ -54,65 +53,6 @@ def expand_wildcards(s, alphabet, min_star_len, max_star_len): return "".join(chars) -@contextmanager -def tempdir(): - tmpdir = mkdtemp() - try: - yield Path(tmpdir) - finally: - shutil.rmtree(tmpdir) - - -@contextmanager -def resource(uri, basedir=None, cachedir=None, cachefile=None, validate=None): - import requests - - with tempdir() as tmpdir: - uri_parts = urlparse(str(uri)) - if uri_parts.scheme.startswith("http"): - download_dir = Path(cachedir or tmpdir) - if cachedir and cachefile: - local_file = download_dir / cachefile - else: - local_file = download_dir / Path(uri_parts.path).name - if local_file.exists() and validate and not validate(local_file): - local_file.unlink() - if not local_file.exists(): - download_dir.mkdir(parents=True, exist_ok=True, mode=0o700) - with open(local_file, "wb") as f: - f.write(requests.get(uri).content) - elif uri_parts.scheme == "file": - local_file = Path(basedir or Path.cwd()) / (uri_parts.netloc + uri_parts.path) - elif uri_parts.scheme == "": - local_file = Path(basedir or Path.cwd()) / uri_parts.path - else: - raise ValueError(f"uri scheme not supported: {uri_parts.scheme}") - - if local_file.is_dir(): - tmpdir = local_file - else: - kwargs = {} - if sys.version_info >= (3, 12) and ".tar" in local_file.suffixes: - kwargs = {"filter": "data"} - try: - shutil.unpack_archive(local_file, tmpdir, **kwargs) - except shutil.ReadError: - tmpdir = local_file - else: - if local_file.parent == tmpdir: - local_file.unlink() - inner_entries = tmpdir.glob("*") - new_tmpdir = next(inner_entries) - try: - # check if there are other directories or files - _ = next(inner_entries) - except StopIteration: - # lone entry, probably a directory, let's use it as base - tmpdir = new_tmpdir - - yield tmpdir - - @functools.lru_cache def load_schema(uri, path, basedir=None): from ruamel.yaml import YAML diff --git a/geneve/utils/dirs.py b/geneve/utils/dirs.py index 0ae82e47..fca2e7ec 100644 --- a/geneve/utils/dirs.py +++ b/geneve/utils/dirs.py @@ -18,8 +18,11 @@ """System dependent notable dirs.""" import os +import shutil import sys +from contextlib import contextmanager from pathlib import Path +from tempfile import mkdtemp if sys.platform == "win32": cache = Path(os.getenv("LOCALAPPDATA")) / "Geneve" / "Cache" @@ -27,3 +30,12 @@ cache = Path.home() / "Library" / "Caches" / "geneve" else: cache = Path.home() / ".cache" / "geneve" + + +@contextmanager +def tempdir(): + tmpdir = mkdtemp() + try: + yield Path(tmpdir) + finally: + shutil.rmtree(tmpdir) diff --git a/geneve/utils/resource.py b/geneve/utils/resource.py new file mode 100644 index 00000000..18bc546e --- /dev/null +++ b/geneve/utils/resource.py @@ -0,0 +1,81 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Util functions.""" + +import shutil +import sys +from contextlib import contextmanager +from pathlib import Path +from urllib.parse import urlparse + +from .dirs import tempdir + + +def download(uri, destdir, *, basedir=None, cachedir=None, cachefile=None, validate=None): + uri_parts = urlparse(str(uri)) + if uri_parts.scheme.startswith("http"): + if cachedir and cachefile: + local_file = cachedir / cachefile + else: + local_file = Path(cachedir or destdir) / Path(uri_parts.path).name + if local_file.exists() and validate and not validate(local_file): + local_file.unlink() + if not local_file.exists(): + local_file.parent.mkdir(parents=True, exist_ok=True, mode=0o700) + with open(local_file, "wb") as f: + import requests + + f.write(requests.get(uri).content) + elif uri_parts.scheme == "file": + local_file = Path(basedir or Path.cwd()) / (uri_parts.netloc + uri_parts.path) + elif uri_parts.scheme == "": + local_file = Path(basedir or Path.cwd()) / uri_parts.path + else: + raise ValueError(f"uri scheme not supported: {uri_parts.scheme}") + return local_file + + +@contextmanager +def resource(uri, basedir=None, cachedir=None, cachefile=None, validate=None): + + with tempdir() as tmpdir: + local_file = download(uri, tmpdir, basedir=basedir, cachedir=cachedir, cachefile=cachefile, validate=validate) + + if local_file.is_dir(): + tmpdir = local_file + else: + kwargs = {} + if sys.version_info >= (3, 12) and ".tar" in local_file.suffixes: + kwargs = {"filter": "data"} + try: + shutil.unpack_archive(local_file, tmpdir, **kwargs) + except shutil.ReadError: + tmpdir = local_file + else: + if local_file.parent == tmpdir: + local_file.unlink() + inner_entries = tmpdir.glob("*") + new_tmpdir = next(inner_entries) + try: + # check if there are other directories or files + _ = next(inner_entries) + except StopIteration: + # lone entry, probably a directory, let's use it as base + tmpdir = new_tmpdir + + yield tmpdir