Skip to content

Commit

Permalink
CLI: use custom PathOrUrl for aiida-pseudo install family (#80)
Browse files Browse the repository at this point in the history
Instead of using the `PathOrUrl` parameter type from `aiida-core` we
add our own implementation that uses the `requests` library instead of
`urllib`, but most importantly, it attempts to retrieve the URL in the
`attempt` context manager that will catch any exceptions and properly
display the error and exit the command, just like other important parts
of the command that can fail.

In addition, the `PseudoPotentialFamilyParamType` is updated to take an
`exclude` list at construction time to signify a list of values that are
considered to be invalid. We use this in the `install family` command to
blacklist the `pseudo.family.sssp` and `pseudo.family.pseudo_dojo` since
they have their own dedicated install commands that should be used
instead.

Finally, the `-F/--archive-format` option is changed to use `-f` for the
shorthand instead, such that it allows `-F` for the `--family-type`
which makes more sense there given that we have `-P/--pseudo-type`. The
option was only in use by `install family` which was a broken command
currently anyway, so the breaking change shouldn't affect any users.
  • Loading branch information
sphuber committed May 4, 2021
1 parent 25a5eda commit c53d623
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 57 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,18 +154,18 @@ aiida-pseudo install family <ARCHIVE> <LABEL>
```
where `<ARCHIVE>` should be replaced with the pseudopotential archive and `<LABEL>` with the label to give to the family.
The command will attempt to automatically detect the compression format of the archive.
If this fails, you can specify the format manually with the `--archive-format/-F` option, for example, for a `.tar.gz` archive:
If this fails, you can specify the format manually with the `--archive-format/-f` option, for example, for a `.tar.gz` archive:
```
aiida-pseudo install family <ARCHIVE> <LABEL> -F gztar
aiida-pseudo install family <ARCHIVE> <LABEL> -f gztar
```
By default, the command will create a family of the base pseudopotential family type.
If you want to create a more specific family, for example an `SsspFamily`, or a `PseudoDojoFamily`, you can provide the corresponding entry point to the `--family-type/-T` option:
If you want to create a more specific family, for example an `CutoffsPseudoPotentialFamily`, you can provide the corresponding entry point to the `--family-type/-F` option:
```
aiida-pseudo install family <ARCHIVE> <LABEL> -T pseudo.family.sssp
aiida-pseudo install family <ARCHIVE> <LABEL> -F pseudo.family.sssp
```
or
```
aiida-pseudo install family <ARCHIVE> <LABEL> -T pseudo.family.pseudo_dojo
aiida-pseudo install family <ARCHIVE> <LABEL> -F pseudo.family.pseudo_dojo
```
The available pseudopotential family classes can be listed with the command:
```
Expand Down
102 changes: 56 additions & 46 deletions aiida_pseudo/cli/install.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
# -*- coding: utf-8 -*-
"""Command to install a pseudo potential family."""
import json
import os
import pathlib
import shutil
import tempfile
import urllib.request

import click
import requests

from aiida.cmdline.utils import decorators, echo
from aiida.cmdline.params import options as options_core
from aiida.cmdline.params import types

from aiida_pseudo.groups.family import PseudoDojoConfiguration, SsspConfiguration
from .params import options
from .params import options, types
from .root import cmd_root


Expand All @@ -28,54 +26,70 @@ def cmd_install():
@click.argument('label', type=click.STRING)
@options_core.DESCRIPTION(help='Description for the family.')
@options.ARCHIVE_FORMAT()
@options.FAMILY_TYPE()
@options.FAMILY_TYPE(
type=types.PseudoPotentialFamilyTypeParam(exclude=('pseudo.family.sssp', 'pseudo.family.pseudo_dojo'))
)
@options.PSEUDO_TYPE()
@options.TRACEBACK()
@decorators.with_dbenv()
def cmd_install_family(archive, label, description, archive_format, family_type, pseudo_type, traceback): # pylint: disable=too-many-arguments
"""Install a standard pseudo potential family from a FOLDER or an ARCHIVE (on the local file system or from a URL).
"""Install a standard pseudopotential family from an ARCHIVE.
The command will attempt first to recognize the passed ARCHIVE_FOLDER as a folder in the local system. If not,
`archive` is assumed to be an archive and the command will attempt to infer the archive format from the
filename extension of the ARCHIVE. If this fails, the archive format can be specified explicitly with the archive
format option, which will also display which formats are supported.
The ARCHIVE can be a (compressed) archive of a directory containing the pseudopotentials on the local file system or
provided by an HTTP URL. Alternatively, it can be a normal directory on the local file system. The (unarchived)
directory should only contain the pseudopotential files and they cannot be in any subdirectory. In addition,
depending on the chosen pseudopotential type (see the option `-P/--pseudo-type`) there can be additional
requirements on the pseudopotential file and filename format.
By default, the command will create a base `PseudoPotentialFamily`, but the type can be changed with the pseudos
type option. If the base type is used, the pseudo potential files in the archive *have* to have filenames that
strictly follow the format `ELEMENT.EXTENSION`, because otherwise the element cannot be determined automatically.
If the ARCHIVE corresponds to a (compressed) archive, the command will attempt to infer the archive format from the
filename extension of the ARCHIVE. If this fails, the archive format can be specified explicitly with the archive
format option `-f/--archive-format`, which will also display which formats are supported. These format suffixes
follow the naming of the `shutil.unpack_archive` standard library method.
Once the ARCHIVE is downloaded, uncompressed and unarchived into a directory on the local file system, the command
will create a `PseudoPotentialFamily` instance where the type of the pseudopotential data nodes that are stored
within it is set through the `-P/--pseudo-type` option. If the default, `pseudo` (which corresponds to the data
plugin `PseudoPotentialData`), is used, the pseudopotential files in the archive *have* to have filenames that
strictly follow the format `ELEMENT.EXTENSION`, or the creation of the family will fail. This is because for the
default pseudopotential type, the format of the file is unknown and the family requires the element to be known,
which in this case can then only be parsed from the filename.
The pseudopotential family type that is created can also be changed with the `-F/--family-type` option. Note,
however, that not all values are accepted. For example, the `pseudo.family.sssp` and `pseudo.family.pseudo_dojo` are
blacklisted since they have their own dedicated commands in `install sssp` and `install pseudo-dojo`, respectively.
"""
from .utils import attempt, create_family_from_archive

# `archive` is a simple string, containing the name of the folder / file / url location.

if pathlib.Path(archive).is_dir():
try:
if isinstance(archive, pathlib.Path) and archive.is_dir():
with attempt(f'creating a pseudopotential family from directory `{archive}`...', include_traceback=traceback):
family = family_type.create_from_folder(archive, label, pseudo_type=pseudo_type)
except ValueError as exception:
raise OSError(f'failed to parse pseudos from `{archive}`: {exception}') from exception
elif pathlib.Path(archive).is_file():
elif isinstance(archive, pathlib.Path) and archive.is_file():
with attempt('unpacking archive and parsing pseudos... ', include_traceback=traceback):
family = create_family_from_archive(
family_type, label, pathlib.Path(archive), fmt=archive_format, pseudo_type=pseudo_type
family_type, label, archive, fmt=archive_format, pseudo_type=pseudo_type
)
else:
# The file of the url must be copied to a local temporary file. Maybe better ways to do it?
# The `create_family_from_archive` does currently not accept filelike objects because the underlying
# `shutil.unpack_archive` does not. Likewise, `unpack_archive` will attempt to deduce the archive format
# from the filename extension, so it is important we maintain the original filename.
# Of course if this fails, users can specify the archive format explicitly wiht the corresponding option.
with urllib.request.urlopen(archive) as handle:
suffix = os.path.basename(handle.url)
with tempfile.NamedTemporaryFile(mode='w+b', suffix=suffix) as handle:
shutil.copyfileobj(handle, handle)
handle.flush()
with attempt('unpacking archive and parsing pseudos... ', include_traceback=traceback):
family = create_family_from_archive(
family_type, label, pathlib.Path(handle.name), fmt=archive_format, pseudo_type=pseudo_type
)
# At this point, we can assume that it is not a valid filepath on disk, but rather a URL and the ``archive``
# variable will contain the result objects from the ``requests`` library. The validation of the URL will already
# have been done by the ``PathOrUrl`` parameter type, so the URL is reachable. The content of the URL must be
# copied to a local temporary file because `create_family_from_archive` does currently not accept filelike
# objects, because in turn the underlying `shutil.unpack_archive` does not. In addition, `unpack_archive` will
# attempt to deduce the archive format from the filename extension, so it is important we maintain the original
# filename. Of course if this fails, users can specify the archive format explicitly with the corresponding
# option. We get the filename by converting the URL to a ``Path`` object and taking the filename, using that as
# a suffix for the temporary file that is generated on disk to copy the content to.
suffix = pathlib.Path(archive.url).name
with tempfile.NamedTemporaryFile(mode='w+b', suffix=suffix) as handle:
handle.write(archive.content)
handle.flush()

with attempt('unpacking archive and parsing pseudos... ', include_traceback=traceback):
family = create_family_from_archive(
family_type, label, pathlib.Path(handle.name), fmt=archive_format, pseudo_type=pseudo_type
)

family.description = description
echo.echo_success(f'installed `{label}` containing {family.count()} pseudo potentials')
echo.echo_success(f'installed `{label}` containing {family.count()} pseudopotentials')


def download_sssp(
Expand All @@ -91,23 +105,21 @@ def download_sssp(
:param filepath_metadata: absolute filepath to write the metadata file to.
:param traceback: boolean, if true, print the traceback when an exception occurs.
"""
import requests

from aiida_pseudo.groups.family import SsspFamily
from .utils import attempt

url_sssp_base = 'https://legacy-archive.materialscloud.org/file/2018.0001/v4/'
url_archive = f"{url_sssp_base}/{SsspFamily.format_configuration_filename(configuration, 'tar.gz')}"
url_metadata = f"{url_sssp_base}/{SsspFamily.format_configuration_filename(configuration, 'json')}"

with attempt('downloading selected pseudo potentials archive... ', include_traceback=traceback):
with attempt('downloading selected pseudopotentials archive... ', include_traceback=traceback):
response = requests.get(url_archive)
response.raise_for_status()
with open(filepath_archive, 'wb') as handle:
handle.write(response.content)
handle.flush()

with attempt('downloading selected pseudo potentials metadata... ', include_traceback=traceback):
with attempt('downloading selected pseudopotentials metadata... ', include_traceback=traceback):
response = requests.get(url_metadata)
response.raise_for_status()
with open(filepath_metadata, 'wb') as handle:
Expand All @@ -128,23 +140,21 @@ def download_pseudo_dojo(
:param filepath_metadata: absolute filepath to write the metadata archive to.
:param traceback: boolean, if true, print the traceback when an exception occurs.
"""
import requests

from aiida_pseudo.groups.family import PseudoDojoFamily
from .utils import attempt

label = PseudoDojoFamily.format_configuration_label(configuration)
url_archive = PseudoDojoFamily.get_url_archive(label)
url_metadata = PseudoDojoFamily.get_url_metadata(label)

with attempt('downloading selected pseudo potentials archive... ', include_traceback=traceback):
with attempt('downloading selected pseudopotentials archive... ', include_traceback=traceback):
response = requests.get(url_archive)
response.raise_for_status()
with open(filepath_archive, 'wb') as handle:
handle.write(response.content)
handle.flush()

with attempt('downloading selected pseudo potentials metadata archive... ', include_traceback=traceback):
with attempt('downloading selected pseudopotentials metadata archive... ', include_traceback=traceback):
response = requests.get(url_metadata)
response.raise_for_status()
with open(filepath_metadata, 'wb') as handle:
Expand Down Expand Up @@ -225,7 +235,7 @@ def cmd_install_sssp(version, functional, protocol, download_only, traceback):
family.description = description
family.set_cutoffs(cutoffs, 'normal', unit='Ry')

echo.echo_success(f'installed `{label}` containing {family.count()} pseudo potentials')
echo.echo_success(f'installed `{label}` containing {family.count()} pseudopotentials')


@cmd_install.command('pseudo-dojo')
Expand Down Expand Up @@ -348,4 +358,4 @@ def cmd_install_pseudo_dojo(
family.set_cutoffs(cutoff_values, stringency, unit='Eh')
family.set_default_stringency(default_stringency)

echo.echo_success(f'installed `{label}` containing {family.count()} pseudo potentials')
echo.echo_success(f'installed `{label}` containing {family.count()} pseudopotentials')
4 changes: 2 additions & 2 deletions aiida_pseudo/cli/params/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
)

FAMILY_TYPE = OverridableOption(
'-T',
'-F',
'--family-type',
type=PseudoPotentialFamilyTypeParam(),
default='pseudo.family',
Expand All @@ -82,7 +82,7 @@
)

ARCHIVE_FORMAT = OverridableOption(
'-F', '--archive-format', type=click.Choice([fmt[0] for fmt in shutil.get_archive_formats()])
'-f', '--archive-format', type=click.Choice([fmt[0] for fmt in shutil.get_archive_formats()])
)

UNIT = OverridableOption(
Expand Down
40 changes: 40 additions & 0 deletions aiida_pseudo/cli/params/types.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use
"""Custom parameter types for command line interface commands."""
import pathlib
import typing

import click
import requests

from aiida.cmdline.params.types import GroupParamType
from ..utils import attempt

__all__ = ('PseudoPotentialFamilyTypeParam', 'PseudoPotentialFamilyParam', 'PseudoPotentialTypeParam')

Expand Down Expand Up @@ -57,6 +62,14 @@ class PseudoPotentialFamilyTypeParam(click.ParamType):

name = 'pseudo_family_type'

def __init__(self, exclude: typing.Optional[typing.List[str]] = None, **kwargs):
"""Construct the parameter.
:param exclude: an optional list of values that should be considered invalid and will raise ``BadParameter``.
"""
super().__init__(**kwargs)
self.exclude = exclude

def convert(self, value, _, __):
"""Convert the entry point name to the corresponding class.
Expand All @@ -73,6 +86,9 @@ def convert(self, value, _, __):
except exceptions.EntryPointError as exception:
raise click.BadParameter(f'`{value}` is not an existing group plugin.') from exception

if self.exclude and value in self.exclude:
raise click.BadParameter(f'`{value}` is not an accepted value for this option.')

if not issubclass(family_type, PseudoPotentialFamily):
raise click.BadParameter(f'`{value}` entry point is not a subclass of `PseudoPotentialFamily`.')

Expand All @@ -88,3 +104,27 @@ def complete(self, _, incomplete):
from aiida.plugins.entry_point import get_entry_point_names
entry_points = get_entry_point_names('aiida.groups')
return [(ep, '') for ep in entry_points if (ep.startswith('pseudo.family') and ep.startswith(incomplete))]


class PathOrUrl(click.Path):
"""Extension of ``click``'s ``Path``-type that also supports URLs."""

name = 'PathOrUrl'

def convert(self, value, param, ctx) -> typing.Union[pathlib.Path, bytes]:
"""Convert the string value to the desired value.
If the ``value`` corresponds to a valid path on the local filesystem, return it as a ``pathlib.Path`` instance.
Otherwise, treat it as a URL and try to fetch the content. If successful, the raw retrieved bytes will be
returned.
:param value: the filepath on the local filesystem or a URL.
"""
try:
# Call the method of the super class, which will raise if it ``value`` is not a valid path.
return pathlib.Path(super().convert(value, param, ctx))
except click.exceptions.BadParameter:
with attempt(f'attempting to download data from `{value}`...'):
response = requests.get(value)
response.raise_for_status()
return response
8 changes: 4 additions & 4 deletions tests/cli/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_list_project(clear_db, run_cli_command, get_pseudo_family):


def test_list_filter(clear_db, run_cli_command, get_pseudo_family):
"""Test the filtering option `-T`."""
"""Test the filtering option `-F`."""
family_base = get_pseudo_family(label='Pseudo potential family', cls=PseudoPotentialFamily)
family_sssp = get_pseudo_family(label='SSSP/1.0/PBE/efficiency', cls=SsspFamily, pseudo_type=UpfData)

Expand All @@ -53,15 +53,15 @@ def test_list_filter(clear_db, run_cli_command, get_pseudo_family):
assert family_base.label in result.output
assert family_sssp.label in result.output

result = run_cli_command(cmd_list, ['--raw', '-T', 'pseudo.family.sssp'])
result = run_cli_command(cmd_list, ['--raw', '-F', 'pseudo.family.sssp'])
assert len(result.output_lines) == 1
assert family_base.label not in result.output
assert family_sssp.label in result.output


def test_list_filter_no_result(clear_db, run_cli_command, get_pseudo_family):
"""Test the filtering option `-T` for a type for which no families exist."""
"""Test the filtering option `-F` for a type for which no families exist."""
get_pseudo_family(label='Pseudo potential family', cls=PseudoPotentialFamily)

result = run_cli_command(cmd_list, ['--raw', '-T', 'pseudo.family.sssp'])
result = run_cli_command(cmd_list, ['--raw', '-F', 'pseudo.family.sssp'])
assert 'no pseudo potential families found that match the filtering criteria.' in result.output

0 comments on commit c53d623

Please sign in to comment.