From 514b3b5b36e011b70a3b7329592500c6d396bbbb Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Tue, 19 Nov 2024 18:53:37 +0100 Subject: [PATCH] Add `nodes/full_types` endpoint to REST API The logic is mainly copied from aiida-core with slight adaptations to resolve type checker errors. --- aiida_restapi/identifiers.py | 284 +++++++++++++++++++++++++++++- aiida_restapi/routers/nodes.py | 8 + docs/source/user_guide/graphql.md | 4 +- tests/test_nodes.py | 89 ++++++++++ 4 files changed, 381 insertions(+), 4 deletions(-) diff --git a/aiida_restapi/identifiers.py b/aiida_restapi/identifiers.py index e00a466..639a508 100644 --- a/aiida_restapi/identifiers.py +++ b/aiida_restapi/identifiers.py @@ -31,10 +31,16 @@ """ -from typing import Any +from __future__ import annotations + +from collections.abc import MutableMapping +from typing import TYPE_CHECKING, Any, Iterator, Optional, Union from aiida.common.escaping import escape_for_sql_like +if TYPE_CHECKING: + from aiida.engine.processes.ports import PortNamespace + FULL_TYPE_CONCATENATOR = '|' LIKE_OPERATOR_CHARACTER = '%' DEFAULT_NAMESPACE_LABEL = '~no-entry-point~' @@ -168,3 +174,279 @@ def load_entry_point_from_full_type(full_type: str) -> Any: # Which means it is most likely a full module path (the fallback option) and we cannot necessarily load the # class from this. We could try with `importlib` but not sure that we should raise EntryPointError('entry point of the given full type cannot be loaded') + + +class Namespace(MutableMapping): + """Namespace that can be used to map the node class hierarchy.""" + + namespace_separator = '.' + + # Very ugly ad-hoc mapping of `path` to `label` for the non-leaf entries in the nested `Namespace` mapping: + mapping_path_to_label: dict[Union[str, None], str] = { + 'node': 'Node', + 'node.data': 'Data', + 'node.process': 'Process', + 'node.process.calculation': 'Calculation', + 'node.process.calculation.calcjob': 'Calculation job', + 'node.process.calculation.calcfunction': 'Calculation function', + 'node.process.workflow': 'Workflow', + 'node.process.workflow.workchain': 'Work chain', + 'node.process.workflow.workfunction': 'Work function', + } + + # This is a hard-coded mapping to generate the correct full types for process node namespaces of external + # plugins. The `node_type` in that case is fixed and the `process_type` should start with the entry point group + # followed by the plugin name and the wildcard. + + process_full_type_mapping = { + 'process.calculation.calcjob.': 'process.calculation.calcjob.CalcJobNode.|aiida.calculations:{plugin_name}.%', + 'process.calculation.calcfunction.': 'process.calculation.calcfunction.CalcFunctionNode.|aiida.calculations:{plugin_name}.%', # noqa: E501 + 'process.workflow.workfunction.': 'process.workflow.workfunction.WorkFunctionNode.|aiida.workflows:{plugin_name}.%', # noqa: E501 + 'process.workflow.workchain.': 'process.workflow.workchain.WorkChainNode.|aiida.workflows:{plugin_name}.%', + } + + process_full_type_mapping_unplugged = { + 'process.calculation.calcjob.': 'process.calculation.calcjob.CalcJobNode.|{plugin_name}.%', + 'process.calculation.calcfunction.': 'process.calculation.calcfunction.CalcFunctionNode.|{plugin_name}.%', + 'process.workflow.workfunction.': 'process.workflow.workfunction.WorkFunctionNode.|{plugin_name}.%', + 'process.workflow.workchain.': 'process.workflow.workchain.WorkChainNode.|{plugin_name}.%', + } + + def __str__(self) -> str: + import json + + return json.dumps(self.get_description(), sort_keys=True, indent=4) + + def __init__( + self, + namespace: str, + path: Optional[str] = None, + label: Optional[str] = None, + full_type: Optional[str] = None, + counter: Optional[int] = None, + is_leaf: bool = True, + ): + """Construct a new node class namespace.""" + self._namespace = namespace + self._path = path if path is not None else namespace + self._full_type = self._infer_full_type(full_type) + self._subspaces: dict[str, PortNamespace] = {} + self._is_leaf = is_leaf + self._counter = counter + + self._label: str + if label is not None: + self._label = label + else: + self._label = self.mapping_path_to_label.get(path, self._path.rpartition('.')[-1]) + + # Manual override for process subspaces that contain entries corresponding to nodes with "unregistered" process + # types. In this case, the label should become `Unregistered` and the full type set to `None` because we cannot + # query for all nodes that fall under this category. + if namespace == DEFAULT_NAMESPACE_LABEL: + self._label = 'Unregistered' + self._full_type = None + + def _infer_full_type(self, full_type: Union[str, None]) -> Union[str, None]: + """Infer the full type based on the current namespace path and the given full type of the leaf.""" + from aiida.common.utils import strip_prefix + + if full_type or self._path is None: + return full_type + + full_type_ = strip_prefix(self._path, 'node.') + + if full_type_.startswith('process.'): + for basepath, full_type_template in self.process_full_type_mapping.items(): + if full_type_.startswith(basepath): + plugin_name = strip_prefix(full_type_, basepath) + if plugin_name.startswith(DEFAULT_NAMESPACE_LABEL): + temp_type_template = self.process_full_type_mapping_unplugged[basepath] + plugin_name = strip_prefix(plugin_name, DEFAULT_NAMESPACE_LABEL + '.') + full_type_ = temp_type_template.format(plugin_name=plugin_name) + else: + full_type_ = full_type_template.format(plugin_name=plugin_name) + return full_type_ + + full_type_ += f'.{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}' + + if full_type_.startswith('process.'): + full_type_ += LIKE_OPERATOR_CHARACTER + + return full_type_ + + def __iter__(self) -> Iterator[PortNamespace]: + return self._subspaces.__iter__() + + def __len__(self) -> int: + return len(self._subspaces) + + def __delitem__(self, key: str) -> None: + del self._subspaces[key] + + def __getitem__(self, key: str) -> PortNamespace: + return self._subspaces[key] + + def __setitem__(self, key: str, port: PortNamespace) -> None: + self._subspaces[key] = port + + @property + def is_leaf(self) -> bool: + return self._is_leaf + + def get_description(self) -> dict: + """Return a dictionary with a description of the ports this namespace contains. + + Nested PortNamespaces will be properly recursed and Ports will print their properties in a list + + :returns: a dictionary of descriptions of the Ports contained within this PortNamespace + """ + result: dict[str, Any] = { + 'namespace': self._namespace, + 'full_type': self._full_type, + 'label': self._label, + 'path': self._path, + 'subspaces': [], + } + + for _, port in self._subspaces.items(): + subspace_result = port.get_description() + result['subspaces'].append(subspace_result) + if 'counter' in subspace_result: + if self._counter is None: + self._counter = 0 + self._counter = self._counter + subspace_result['counter'] + + if self._counter is not None: + result['counter'] = self._counter + + return result + + def create_namespace(self, name: str, **kwargs: Any) -> 'Namespace': + """Create and return a new `Namespace` in this `Namespace`. + + If the name is namespaced, the sub `Namespaces` will be created recursively, except if one of the namespaces is + already occupied at any level by a Port in which case a ValueError will be thrown + + :param name: name (potentially namespaced) of the port to create and return + :param kwargs: constructor arguments that will be used *only* for the construction of the terminal Namespace + :returns: Namespace + :raises: ValueError if any sub namespace is occupied by a non-Namespace port + """ + if not isinstance(name, str): + raise ValueError(f'name has to be a string type, not {type(name)}') + + if not name: + raise ValueError('name cannot be an empty string') + + namespace = name.split(self.namespace_separator) + port_name = namespace.pop(0) + + path = f'{self._path}{self.namespace_separator}{port_name}' + + # If this is True, the (sub) port namespace does not yet exist, so we create it + if port_name not in self: + # If there still is a `namespace`, we create a sub namespace, *without* the constructor arguments + if namespace: + self[port_name] = self.__class__(port_name, path=path, is_leaf=False) + + # Otherwise it is the terminal port and we construct *with* the keyword arugments + else: + kwargs['is_leaf'] = True + self[port_name] = self.__class__(port_name, path=path, **kwargs) + + # The port does already exist: if it is a leaf and `namespace` is not empty, then the current leaf node is + # also a namespace itself, so create a namespace with the same name and put the leaf within itself + elif self[port_name].is_leaf and namespace: + clone = self[port_name] + self[port_name] = self.__class__(port_name, path=path, is_leaf=False) + self[port_name][port_name] = clone + + # If the current existing port is not a leaf and we do not have remaining namespace, that means the current + # namespace is the "concrete" version of the namespace, so we add the leaf version to the namespace. + elif not self[port_name].is_leaf and not namespace: + kwargs['is_leaf'] = True + self[port_name][port_name] = self.__class__(port_name, path=f'{path}.{port_name}', **kwargs) + + # If there is still `namespace` left, we create the next namespace + if namespace: + kwargs['is_leaf'] = True + return self[port_name].create_namespace(self.namespace_separator.join(namespace), **kwargs) + + return self[port_name] + + +def get_node_namespace(user_pk: Optional[int] = None, count_nodes: Optional[int] = False) -> 'Namespace': + """Return the full namespace of all available nodes in the current database. + + :return: complete node `Namespace` + """ + from aiida import orm + from aiida.plugins.entry_point import is_valid_entry_point_string, parse_entry_point_string + + filters = {} + if user_pk is not None: + filters['user_id'] = user_pk + + builder = orm.QueryBuilder().append(orm.Node, filters=filters, project=['node_type', 'process_type']).distinct() + + # All None instances of process_type are turned into '' + unique_types = {(node_type, process_type if process_type else '') for node_type, process_type in builder.all()} + + # First we create a flat list of all "leaf" node types. + namespaces = [] + + for node_type, process_type in unique_types: + label = None + counter = None + namespace = None + + if process_type: + # Only process nodes + parts = node_type.rsplit('.', 2) + if is_valid_entry_point_string(process_type): + _, entry_point_name = parse_entry_point_string(process_type) + label = entry_point_name.rpartition('.')[-1] + namespace = '.'.join(parts[:-2] + [entry_point_name]) + else: + label = process_type.rsplit('.', 1)[-1] + namespace = '.'.join(parts[:-2] + [DEFAULT_NAMESPACE_LABEL, process_type]) + + else: + # Data nodes and process nodes without process type (='' or =None) + parts = node_type.rsplit('.', 2) + try: + label = parts[-2] + namespace = '.'.join(parts[:-2]) + except IndexError: + continue + + if count_nodes: + builder = orm.QueryBuilder() + concat_filters = [{'node_type': {'==': node_type}}] + + if node_type.startswith('process.'): + if process_type: + concat_filters.append({'process_type': {'==': process_type}}) + else: + concat_filters.append({'process_type': {'or': [{'==': ''}, {'==': None}]}}) + + if user_pk: + concat_filters.append({'user_id': {'==': user_pk}}) + + if len(concat_filters) == 1: + builder.append(orm.Node, filters=concat_filters[0]) + else: + builder.append(orm.Node, filters={'and': concat_filters}) + + counter = builder.count() + + full_type = construct_full_type(node_type, process_type) + namespaces.append((namespace, label, full_type, counter)) + + node_namespace = Namespace('node') + + for namespace, label, full_type, counter in sorted(namespaces, key=lambda x: x[0], reverse=False): + node_namespace.create_namespace(namespace, label=label, full_type=full_type, counter=counter) + + return node_namespace diff --git a/aiida_restapi/routers/nodes.py b/aiida_restapi/routers/nodes.py index 5f17a4e..0241a32 100644 --- a/aiida_restapi/routers/nodes.py +++ b/aiida_restapi/routers/nodes.py @@ -15,6 +15,7 @@ from pydantic import ValidationError from aiida_restapi import models, resources +from aiida_restapi.identifiers import get_node_namespace from .auth import get_current_active_user @@ -85,6 +86,13 @@ def stream() -> Generator[bytes, None, None]: ) +@router.get('/nodes/full_types', response_model=dict[str, Any]) +@with_dbenv() +async def get_full_types() -> dict[str, Any]: + """Return full_types of the nodes""" + return get_node_namespace(user_pk=None, count_nodes=False).get_description() + + @router.get('/nodes/{nodes_id}', response_model=models.Node) @with_dbenv() async def read_node(nodes_id: int) -> Optional[models.Node]: diff --git a/docs/source/user_guide/graphql.md b/docs/source/user_guide/graphql.md index 8a1eed4..b8b712b 100644 --- a/docs/source/user_guide/graphql.md +++ b/docs/source/user_guide/graphql.md @@ -369,9 +369,7 @@ http://localhost:5000/api/v4/nodes?attributes=true&attributes_filter=pbc1 http://localhost:5000/api/v4/nodes/full_types ``` -NOT YET SPECIFICALLY IMPLEMENTED -(although this needs further investigation, because full types is basically not documented anywhere) - +Not implemented for GraphQL, please use the REST API for this use case. ```html http://localhost:5000/api/v4/nodes/download_formats diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 72c8444..06b1771 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -351,3 +351,92 @@ async def test_get_download_node(array_data_node, async_client): response = await async_client.get(f'/nodes/{array_data_node.pk}/download') assert response.status_code == 422, response.json() assert 'Please specify the download format' in response.json()['detail'] + + +def test_get_full_types(default_computers, example_processes, default_groups, default_nodes, client): + """Test get full_types nodes.""" + response = client.get('/nodes/full_types') + + assert response.status_code == 200, response.json() + assert response.json() == { + 'full_type': 'node.%|', + 'label': 'node', + 'namespace': 'node', + 'path': 'node', + 'subspaces': [ + { + 'full_type': 'data.%|', + 'label': 'Data', + 'namespace': 'data', + 'path': 'node.data', + 'subspaces': [ + { + 'full_type': 'data.core.%|', + 'label': 'core', + 'namespace': 'core', + 'path': 'node.data.core', + 'subspaces': [ + { + 'full_type': 'data.core.bool.Bool.|', + 'label': 'Bool', + 'namespace': 'bool', + 'path': 'node.data.core.bool', + 'subspaces': [], + }, + { + 'full_type': 'data.core.float.Float.|', + 'label': 'Float', + 'namespace': 'float', + 'path': 'node.data.core.float', + 'subspaces': [], + }, + { + 'full_type': 'data.core.int.Int.|', + 'label': 'Int', + 'namespace': 'int', + 'path': 'node.data.core.int', + 'subspaces': [], + }, + { + 'full_type': 'data.core.str.Str.|', + 'label': 'Str', + 'namespace': 'str', + 'path': 'node.data.core.str', + 'subspaces': [], + }, + ], + } + ], + }, + { + 'full_type': 'process.%|%', + 'label': 'Process', + 'namespace': 'process', + 'path': 'node.process', + 'subspaces': [ + { + 'full_type': 'process.workflow.%|%', + 'label': 'Workflow', + 'namespace': 'workflow', + 'path': 'node.process.workflow', + 'subspaces': [ + { + 'full_type': 'process.workflow.workchain.WorkChainNode.|', + 'label': 'WorkChainNode', + 'namespace': 'workchain', + 'path': 'node.process.workflow.workchain', + 'subspaces': [], + }, + { + 'full_type': 'process.workflow.workfunction.WorkFunctionNode.|', + 'label': 'WorkFunctionNode', + 'namespace': 'workfunction', + 'path': 'node.process.workflow.workfunction', + 'subspaces': [], + }, + ], + } + ], + }, + ], + }