From 0cf81c86c72750f7261331cf12bd079d96f4fec4 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Mon, 15 May 2023 12:50:09 +0200 Subject: [PATCH 1/3] Add the `SqliteDosStorage` storage backend The implementation subclasses the `PsqlDosBackend` and replaces the PostgreSQL database with an sqlite database. By doing so, the initialization of the storage only requires a directory on the local file system where it will create the sqlite file for the database and a container for the disk-objectstore. The advantage of this `sqlite_dos` storage over the default `psql_dos` is that it doesn't require a system service like PostgreSQL. As a result, creating the storage is very straightforward and can be done with almost no setup. The advantage over the existing `sqlite_zip` is that the `sqlite_dos` is not read-only but can be used to write data as well. Combined with the `verdi profile setup` command, a working profile can be created with a single command: verdi profile setup core.sqlite_dos -n --profile name --email e@mail This makes this storage backend very useful for tutorials and demos that don't rely on performance. --- aiida/manage/configuration/__init__.py | 2 +- aiida/storage/__init__.py | 2 + aiida/storage/sqlite_dos/__init__.py | 15 ++ aiida/storage/sqlite_dos/backend.py | 184 ++++++++++++++++++ docs/source/nitpick-exceptions | 1 + pyproject.toml | 1 + tests/cmdline/commands/test_profile.py | 10 +- .../configuration/test_configuration.py | 8 +- 8 files changed, 215 insertions(+), 8 deletions(-) create mode 100644 aiida/storage/sqlite_dos/__init__.py create mode 100644 aiida/storage/sqlite_dos/backend.py diff --git a/aiida/manage/configuration/__init__.py b/aiida/manage/configuration/__init__.py index f5d35ebb6c..0942b849b9 100644 --- a/aiida/manage/configuration/__init__.py +++ b/aiida/manage/configuration/__init__.py @@ -208,7 +208,7 @@ def create_profile( """ from aiida.orm import User - storage_config = storage_cls.Configuration(**{k: v for k, v in kwargs.items() if v is not None}).dict() + storage_config = storage_cls.Configuration(**{k: v for k, v in kwargs.items() if v is not None}).model_dump() profile: Profile = config.create_profile(name=name, storage_cls=storage_cls, storage_config=storage_config) with profile_context(profile.name, allow_switch=True): diff --git a/aiida/storage/__init__.py b/aiida/storage/__init__.py index d10f4a7799..e96bc23623 100644 --- a/aiida/storage/__init__.py +++ b/aiida/storage/__init__.py @@ -14,8 +14,10 @@ # yapf: disable # pylint: disable=wildcard-import +from .sqlite_dos import * __all__ = ( + 'SqliteDosStorage', ) # yapf: enable diff --git a/aiida/storage/sqlite_dos/__init__.py b/aiida/storage/sqlite_dos/__init__.py new file mode 100644 index 0000000000..5e23640982 --- /dev/null +++ b/aiida/storage/sqlite_dos/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +"""Storage implementation using Sqlite database and disk-objectstore container.""" + +# AUTO-GENERATED + +# yapf: disable +# pylint: disable=wildcard-import + +from .backend import * + +__all__ = ( + 'SqliteDosStorage', +) + +# yapf: enable diff --git a/aiida/storage/sqlite_dos/backend.py b/aiida/storage/sqlite_dos/backend.py new file mode 100644 index 0000000000..f71ee5dcdd --- /dev/null +++ b/aiida/storage/sqlite_dos/backend.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +"""Storage implementation using Sqlite database and disk-objectstore container.""" +from __future__ import annotations + +from functools import cached_property +from pathlib import Path +from tempfile import mkdtemp +from typing import TYPE_CHECKING + +from disk_objectstore import Container +from pydantic import BaseModel, Field +from sqlalchemy import insert +from sqlalchemy.orm import scoped_session, sessionmaker + +from aiida.manage import Profile +from aiida.orm.implementation import BackendEntity +from aiida.storage.psql_dos.models.settings import DbSetting +from aiida.storage.sqlite_zip import models, orm +from aiida.storage.sqlite_zip.migrator import get_schema_version_head +from aiida.storage.sqlite_zip.utils import create_sqla_engine + +from ..psql_dos import PsqlDosBackend +from ..psql_dos.migrator import REPOSITORY_UUID_KEY, PsqlDosMigrator + +if TYPE_CHECKING: + from aiida.repository.backend import DiskObjectStoreRepositoryBackend + +__all__ = ('SqliteDosStorage',) + + +class SqliteDosMigrator(PsqlDosMigrator): + """Storage implementation using Sqlite database and disk-objectstore container. + + This storage backend is not recommended for use in production. The sqlite database is not the most performant and it + does not support all the ``QueryBuilder`` functionality that is supported by the ``core.psql_dos`` storage backend. + This storage is ideally suited for use cases that want to test or demo AiiDA as it requires no server but just a + folder on the local filesystem. + """ + + def __init__(self, profile: Profile) -> None: + # pylint: disable=super-init-not-called + filepath_database = Path(profile.storage_config['filepath']) / 'database.sqlite' + filepath_database.touch() + + self.profile = profile + self._engine = create_sqla_engine(filepath_database) + self._connection = None + + def get_container(self) -> Container: + """Return the disk-object store container. + + :returns: The disk-object store container configured for the repository path of the current profile. + """ + filepath_container = Path(self.profile.storage_config['filepath']) / 'container' + return Container(str(filepath_container)) + + def initialise_database(self) -> None: + """Initialise the database. + + This assumes that the database has no schema whatsoever and so the initial schema is created directly from the + models at the current head version without migrating through all of them one by one. + """ + models.SqliteBase.metadata.create_all(self._engine) + + repository_uuid = self.get_repository_uuid() + + # Create a "sync" between the database and repository, by saving its UUID in the settings table + # this allows us to validate inconsistencies between the two + self.connection.execute( + insert(DbSetting).values(key=REPOSITORY_UUID_KEY, val=repository_uuid, description='Repository UUID') + ) + + # finally, generate the version table, "stamping" it with the most recent revision + with self._migration_context() as context: + context.stamp(context.script, 'main@head') # type: ignore[arg-type] + self.connection.commit() # pylint: disable=no-member + + +class SqliteDosStorage(PsqlDosBackend): + """A lightweight backend intended for demos and testing. + + This backend implementation uses an Sqlite database and + """ + + migrator = SqliteDosMigrator + + class Configuration(BaseModel): + + filepath: str = Field( + title='Directory of the backend', + description='Filepath of the directory in which to store data for this backend.', + default_factory=mkdtemp + ) + + @classmethod + def initialise(cls, profile: Profile, reset: bool = False) -> bool: + filepath = Path(profile.storage_config['filepath']) + + try: + filepath.mkdir(parents=True, exist_ok=True) + except FileExistsError as exception: + raise ValueError( + f'`{filepath}` is a file and cannot be used for instance of `SqliteDosStorage`.' + ) from exception + + if list(filepath.iterdir()): + raise ValueError( + f'`{filepath}` already exists but is not empty and cannot be used for instance of `SqliteDosStorage`.' + ) + + return super().initialise(profile, reset) + + def __str__(self) -> str: + state = 'closed' if self.is_closed else 'open' + return f'SqliteDosStorage[{self._profile.storage_config["filepath"]}]: {state},' + + def _initialise_session(self): + """Initialise the SQLAlchemy session factory. + + Only one session factory is ever associated with a given class instance, + i.e. once the instance is closed, it cannot be reopened. + + The session factory, returns a session that is bound to the current thread. + Multi-thread support is currently required by the REST API. + Although, in the future, we may want to move the multi-thread handling to higher in the AiiDA stack. + """ + engine = create_sqla_engine(Path(self._profile.storage_config['filepath']) / 'database.sqlite') + self._session_factory = scoped_session(sessionmaker(bind=engine, future=True, expire_on_commit=True)) + + def get_repository(self) -> 'DiskObjectStoreRepositoryBackend': + from aiida.repository.backend import DiskObjectStoreRepositoryBackend + container = Container(str(Path(self.profile.storage_config['filepath']) / 'container')) + return DiskObjectStoreRepositoryBackend(container=container) + + @classmethod + def version_head(cls) -> str: + return get_schema_version_head() + + @classmethod + def version_profile(cls, profile: Profile) -> str | None: # pylint: disable=unused-argument + return get_schema_version_head() + + def query(self) -> orm.SqliteQueryBuilder: + return orm.SqliteQueryBuilder(self) + + def get_backend_entity(self, model) -> BackendEntity: + """Return the backend entity that corresponds to the given Model instance.""" + return orm.get_backend_entity(model, self) + + @cached_property + def authinfos(self): + return orm.SqliteAuthInfoCollection(self) + + @cached_property + def comments(self): + return orm.SqliteCommentCollection(self) + + @cached_property + def computers(self): + return orm.SqliteComputerCollection(self) + + @cached_property + def groups(self): + return orm.SqliteGroupCollection(self) + + @cached_property + def logs(self): + return orm.SqliteLogCollection(self) + + @cached_property + def nodes(self): + return orm.SqliteNodeCollection(self) + + @cached_property + def users(self): + return orm.SqliteUserCollection(self) diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions index e7ceeee3e7..1c0be753a2 100644 --- a/docs/source/nitpick-exceptions +++ b/docs/source/nitpick-exceptions @@ -199,6 +199,7 @@ py:class Flask py:class pytest.tmpdir.TempPathFactory +py:class scoped_session py:class sqlalchemy.orm.decl_api.SqliteModel py:class sqlalchemy.orm.decl_api.Base py:class sqlalchemy.sql.compiler.TypeCompiler diff --git a/pyproject.toml b/pyproject.toml index 1ff3c06735..c4abf7d864 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -225,6 +225,7 @@ runaiida = "aiida.cmdline.commands.cmd_run:run" [project.entry-points."aiida.storage"] "core.psql_dos" = "aiida.storage.psql_dos.backend:PsqlDosBackend" +"core.sqlite_dos" = "aiida.storage.sqlite_dos.backend:SqliteDosStorage" "core.sqlite_temp" = "aiida.storage.sqlite_temp.backend:SqliteTempBackend" "core.sqlite_zip" = "aiida.storage.sqlite_zip.backend:SqliteZipBackend" diff --git a/tests/cmdline/commands/test_profile.py b/tests/cmdline/commands/test_profile.py index 2cd8201d6e..767fc137c6 100644 --- a/tests/cmdline/commands/test_profile.py +++ b/tests/cmdline/commands/test_profile.py @@ -133,10 +133,14 @@ def test_delete(run_cli_command, mock_profiles, pg_test_cluster): assert profile_list[3] not in result.output -def test_setup(run_cli_command, isolated_config, tmp_path): - """Test the ``verdi profile setup`` command.""" +@pytest.mark.parametrize('entry_point', ('core.sqlite_temp', 'core.sqlite_dos')) +def test_setup(run_cli_command, isolated_config, tmp_path, entry_point): + """Test the ``verdi profile setup`` command. + + Note that the options for user name and institution are not given in purpose + """ profile_name = 'temp-profile' - options = ['core.sqlite_temp', '-n', '--filepath', str(tmp_path), '--profile', profile_name] + options = [entry_point, '-n', '--filepath', str(tmp_path), '--profile', profile_name, '--email', 'email@host'] result = run_cli_command(cmd_profile.profile_setup, options, use_subprocess=False) assert f'Created new profile `{profile_name}`.' in result.output assert profile_name in isolated_config.profile_names diff --git a/tests/manage/configuration/test_configuration.py b/tests/manage/configuration/test_configuration.py index e363bd9a60..3b7d93181a 100644 --- a/tests/manage/configuration/test_configuration.py +++ b/tests/manage/configuration/test_configuration.py @@ -5,15 +5,15 @@ import aiida from aiida.manage.configuration import Profile, create_profile, get_profile, profile_context from aiida.manage.manager import get_manager +from aiida.storage.sqlite_dos.backend import SqliteDosStorage from aiida.storage.sqlite_temp.backend import SqliteTempBackend -def test_create_profile(isolated_config, tmp_path): +@pytest.mark.parametrize('cls', (SqliteTempBackend, SqliteDosStorage)) +def test_create_profile(isolated_config, tmp_path, cls): """Test :func:`aiida.manage.configuration.tools.create_profile`.""" profile_name = 'testing' - profile = create_profile( - isolated_config, SqliteTempBackend, name=profile_name, email='test@localhost', filepath=str(tmp_path) - ) + profile = create_profile(isolated_config, cls, name=profile_name, email='test@localhost', filepath=str(tmp_path)) assert isinstance(profile, Profile) assert profile_name in isolated_config.profile_names From 527f515f3cb1db5242fe0473d0faf33a06530de3 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Fri, 7 Jul 2023 16:57:27 -0500 Subject: [PATCH 2/3] `SqliteZipBackend`: Return `self` in `store` The `store` method of the `SqliteEntityOverride` class, used by the `SqliteZipBackend` storage backend (and with that all other backends to subclass this), did not return `self`. This is in conflict with the signature of the base class that it is overriding. Since the `SqliteZipBackend` is read-only and so `store` would never be called, this problem went unnoticed. However, with the addition of the `SqliteDosStorage` backend which is *not* read-only, this bug would surface when trying to store a node since certain methods rely on this method returning the node instance itself. --- aiida/storage/sqlite_zip/orm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida/storage/sqlite_zip/orm.py b/aiida/storage/sqlite_zip/orm.py index eaa1dab875..023421875e 100644 --- a/aiida/storage/sqlite_zip/orm.py +++ b/aiida/storage/sqlite_zip/orm.py @@ -70,7 +70,7 @@ def store(self, *args, **kwargs): backend = self._model._backend # pylint: disable=protected-access if getattr(backend, '_read_only', False): raise ReadOnlyError(f'Cannot store entity in read-only backend: {backend}') - super().store(*args, **kwargs) # type: ignore # pylint: disable=no-member + return super().store(*args, **kwargs) # type: ignore # pylint: disable=no-member class SqliteUser(SqliteEntityOverride, users.SqlaUser): From 808799516c1f30bee25d1fe3dcb90088d913a1c2 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Thu, 9 Nov 2023 16:05:57 +0100 Subject: [PATCH 3/3] Fix `QueryBuilder.count` for storage backends using sqlite The storage backends that use sqlite instead of PostgreSQL, i.e., `core.sqlite_dos`, `core.sqlite_temp` and `core.sqlite_zip`, piggy back of the ORM models defined by the `core.psql_dos` backend by dynamically converting to the sqlite equivalent database models. The current implementation of `SqlaGroup.count` would except when used with an sqlite backend since certain columns would be ambiguously defined: sqlite3.OperationalError: ambiguous column name: db_dbgroup.id This is fixed by explicitly wrapping the classes that are joined in `sqlalchemy.orm.aliased` which will force sqlalchemy to properly alias each class removing the ambiguity. --- aiida/storage/psql_dos/orm/groups.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aiida/storage/psql_dos/orm/groups.py b/aiida/storage/psql_dos/orm/groups.py index d22d04538d..313262c8fc 100644 --- a/aiida/storage/psql_dos/orm/groups.py +++ b/aiida/storage/psql_dos/orm/groups.py @@ -116,8 +116,11 @@ def count(self): :return: integer number of entities contained within the group """ + from sqlalchemy.orm import aliased + group = aliased(self.MODEL_CLASS) + nodes = aliased(self.GROUP_NODE_CLASS) session = self.backend.get_session() - return session.query(self.MODEL_CLASS).join(self.MODEL_CLASS.dbnodes).filter(DbGroup.id == self.pk).count() + return session.query(group).join(nodes, nodes.dbgroup_id == group.id).filter(group.id == self.pk).count() def clear(self): """Remove all the nodes from this group."""