Skip to content

Commit

Permalink
Spin off geneve/utils/resource.py
Browse files Browse the repository at this point in the history
  • Loading branch information
cavokz committed Nov 29, 2024
1 parent 43ce743 commit 9200a97
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 62 deletions.
64 changes: 2 additions & 62 deletions geneve/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,15 @@
import functools
import json
import re
import shutil
import sys
from contextlib import contextmanager
from pathlib import Path
from random import Random
from tempfile import mkdtemp
from types import SimpleNamespace
from urllib.parse import urlparse, urlunparse

from . import dirs, epr
from .dirs import tempdir as tempdir
from .resource import resource as resource

random = Random()
wc_re = re.compile(r"\*|\?")
Expand All @@ -54,65 +53,6 @@ def expand_wildcards(s, alphabet, min_star_len, max_star_len):
return "".join(chars)


@contextmanager
def tempdir():
tmpdir = mkdtemp()
try:
yield Path(tmpdir)
finally:
shutil.rmtree(tmpdir)


@contextmanager
def resource(uri, basedir=None, cachedir=None, cachefile=None, validate=None):
import requests

with tempdir() as tmpdir:
uri_parts = urlparse(str(uri))
if uri_parts.scheme.startswith("http"):
download_dir = Path(cachedir or tmpdir)
if cachedir and cachefile:
local_file = download_dir / cachefile
else:
local_file = download_dir / Path(uri_parts.path).name
if local_file.exists() and validate and not validate(local_file):
local_file.unlink()
if not local_file.exists():
download_dir.mkdir(parents=True, exist_ok=True, mode=0o700)
with open(local_file, "wb") as f:
f.write(requests.get(uri).content)
elif uri_parts.scheme == "file":
local_file = Path(basedir or Path.cwd()) / (uri_parts.netloc + uri_parts.path)
elif uri_parts.scheme == "":
local_file = Path(basedir or Path.cwd()) / uri_parts.path
else:
raise ValueError(f"uri scheme not supported: {uri_parts.scheme}")

if local_file.is_dir():
tmpdir = local_file
else:
kwargs = {}
if sys.version_info >= (3, 12) and ".tar" in local_file.suffixes:
kwargs = {"filter": "data"}
try:
shutil.unpack_archive(local_file, tmpdir, **kwargs)
except shutil.ReadError:
tmpdir = local_file
else:
if local_file.parent == tmpdir:
local_file.unlink()
inner_entries = tmpdir.glob("*")
new_tmpdir = next(inner_entries)
try:
# check if there are other directories or files
_ = next(inner_entries)
except StopIteration:
# lone entry, probably a directory, let's use it as base
tmpdir = new_tmpdir

yield tmpdir


@functools.lru_cache
def load_schema(uri, path, basedir=None):
from ruamel.yaml import YAML
Expand Down
12 changes: 12 additions & 0 deletions geneve/utils/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,24 @@
"""System dependent notable dirs."""

import os
import shutil
import sys
from contextlib import contextmanager
from pathlib import Path
from tempfile import mkdtemp

if sys.platform == "win32":
cache = Path(os.getenv("LOCALAPPDATA")) / "Geneve" / "Cache"
elif sys.platform == "darwin":
cache = Path.home() / "Library" / "Caches" / "geneve"
else:
cache = Path.home() / ".cache" / "geneve"


@contextmanager
def tempdir():
tmpdir = mkdtemp()
try:
yield Path(tmpdir)
finally:
shutil.rmtree(tmpdir)
81 changes: 81 additions & 0 deletions geneve/utils/resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Util functions."""

import shutil
import sys
from contextlib import contextmanager
from pathlib import Path
from urllib.parse import urlparse

from .dirs import tempdir


def download(uri, destdir, *, basedir=None, cachedir=None, cachefile=None, validate=None):
uri_parts = urlparse(str(uri))
if uri_parts.scheme.startswith("http"):
if cachedir and cachefile:
local_file = cachedir / cachefile
else:
local_file = Path(cachedir or destdir) / Path(uri_parts.path).name
if local_file.exists() and validate and not validate(local_file):
local_file.unlink()
if not local_file.exists():
local_file.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
with open(local_file, "wb") as f:
import requests

f.write(requests.get(uri).content)
elif uri_parts.scheme == "file":
local_file = Path(basedir or Path.cwd()) / (uri_parts.netloc + uri_parts.path)
elif uri_parts.scheme == "":
local_file = Path(basedir or Path.cwd()) / uri_parts.path
else:
raise ValueError(f"uri scheme not supported: {uri_parts.scheme}")
return local_file


@contextmanager
def resource(uri, basedir=None, cachedir=None, cachefile=None, validate=None):

with tempdir() as tmpdir:
local_file = download(uri, tmpdir, basedir=basedir, cachedir=cachedir, cachefile=cachefile, validate=validate)

if local_file.is_dir():
tmpdir = local_file
else:
kwargs = {}
if sys.version_info >= (3, 12) and ".tar" in local_file.suffixes:
kwargs = {"filter": "data"}
try:
shutil.unpack_archive(local_file, tmpdir, **kwargs)
except shutil.ReadError:
tmpdir = local_file
else:
if local_file.parent == tmpdir:
local_file.unlink()
inner_entries = tmpdir.glob("*")
new_tmpdir = next(inner_entries)
try:
# check if there are other directories or files
_ = next(inner_entries)
except StopIteration:
# lone entry, probably a directory, let's use it as base
tmpdir = new_tmpdir

yield tmpdir

0 comments on commit 9200a97

Please sign in to comment.