diff --git a/.clabot b/.clabot index 090bf33f5..e15890e57 100644 --- a/.clabot +++ b/.clabot @@ -1 +1 @@ -{ "contributors": [ "Ashish13898", "C-nit", "GitHub Action", "NicolaiRee", "O-Schilter", "YoelShoshan", "avaucher", "christofid", "donovat", "drugilsberg", "edux300", "elzinga-ibm-research", "federicozipoli", "georgosgeorgos", "helderlopes97", "helenaMontenegro", "hoffmansc", "jannisborn", "jrs021", "kishimoto1974", "kwehden", "mirunacrt", "polat-c", "yvesnana" ], "message": "Thanks a lot for working on GT4SD, we strongly value contributions from our users. It appears that one of the commiters in the PR did not sign the CLA for contributors. To do so, you can open an issue to sign the CLA clicking [here](https://github.com/GT4SD/gt4sd-core/issues/new?labels=cla-signing&template=cla-signature.yaml&title=CLA+signature). More details can be found [here](https://github.com/GT4SD/gt4sd-core/blob/main/CONTRIBUTING.md#contributor-license-agreement)." } +{ "contributors": [ "Ashish13898", "C-nit", "GitHub Action", "NicolaiRee", "O-Schilter", "YoelShoshan", "avaucher", "christofid", "donovat", "drugilsberg", "edux300", "elzinga-ibm-research", "federicozipoli", "fiskrt", "georgosgeorgos", "helderlopes97", "helenaMontenegro", "hoffmansc", "jannisborn", "jrs021", "kishimoto1974", "kwehden", "mirunacrt", "polat-c", "yvesnana" ], "message": "Thanks a lot for working on GT4SD, we strongly value contributions from our users. It appears that one of the commiters in the PR did not sign the CLA for contributors. To do so, you can open an issue to sign the CLA clicking [here](https://github.com/GT4SD/gt4sd-core/issues/new?labels=cla-signing&template=cla-signature.yaml&title=CLA+signature). More details can be found [here](https://github.com/GT4SD/gt4sd-core/blob/main/CONTRIBUTING.md#contributor-license-agreement)." } diff --git a/.github/workflows/cla-signature.yaml b/.github/workflows/cla-signature.yaml index 8d668ab8f..edfef7cf6 100644 --- a/.github/workflows/cla-signature.yaml +++ b/.github/workflows/cla-signature.yaml @@ -34,12 +34,12 @@ jobs: with: branch: main directory: gt4sd-core-main - github_token: ${{ secrets.GH_TOKEN }} + github_token: ${{ secrets.GITHUB_TOKEN }} - name: Mark signature in the issue uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90 with: add-labels: cla-signed - repo-token: ${{ secrets.GH_TOKEN }} + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Close issue (existing contributor) uses: peter-evans/close-issue@v1 if: ${{ env.signing_commit_hash == '' }} diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index ca3c427f5..1f44d253e 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -27,7 +27,7 @@ jobs: conda activate gt4sd pip install --no-deps . # make sure we can build docs using specific dependencies - pip install "attrs==20.3.0" "markdown-it-py==0.6.2" + pip install "attrs==20.3.0" "markdown-it-py==1.0.0" cd docs && make html && cd .. - name: Commit documentation changes run: | diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml index bf5732936..16e988b2a 100644 --- a/.github/workflows/pypi.yaml +++ b/.github/workflows/pypi.yaml @@ -16,6 +16,7 @@ jobs: max-parallel: 3 matrix: python-version: + - "3.10" - 3.8 - 3.7 os: diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 21fcbf262..bce2076c9 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -19,6 +19,8 @@ jobs: defaults: run: shell: bash -l {0} # for conda command + env: + GIT_CLONE_PROTECTION_ACTIVE: false steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2 @@ -48,7 +50,7 @@ jobs: - name: Check mypy run: | conda activate gt4sd - python -m mypy src/gt4sd + python -m mypy src/gt4sd --show-traceback - name: Run pytests run: | conda activate gt4sd diff --git a/README.md b/README.md index 579d6a9ee..db82c6002 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,10 @@ Almost all pretrained models are also available via `gradio`-powered [web apps]( Currently `gt4sd` relies on: -- python>=3.7,<3.9 -- pip>=19.1,<20.3 +- python>=3.7,<=3.10 +- pip==24.0 -We are actively working on relaxing these, so stay tuned or help us with this by [contributing](./CONTRIBUTING.md) to the project. +If you need others, help us by [contributing](./CONTRIBUTING.md) to the project. ### Conda diff --git a/conda_cpu_linux.yml b/conda_cpu_linux.yml index 2dd1b4a36..f38d42148 100644 --- a/conda_cpu_linux.yml +++ b/conda_cpu_linux.yml @@ -5,8 +5,9 @@ channels: - https://conda.anaconda.org/pyg - https://conda.anaconda.org/conda-forge dependencies: - - python>=3.7,<3.9 - - pip>=19.1,<20.3 + - setuptools==69.5.1 + - python>=3.7,<3.11 + - pip=24.0 - pytorch>=1.0,<=1.12.1 - cpuonly - pytorch-scatter<=2.0.9=*cu102* diff --git a/conda_cpu_mac.yml b/conda_cpu_mac.yml index e1fcb522a..84891748f 100644 --- a/conda_cpu_mac.yml +++ b/conda_cpu_mac.yml @@ -5,8 +5,9 @@ channels: - https://conda.anaconda.org/pyg - https://conda.anaconda.org/conda-forge dependencies: - - python>=3.7,<3.9 - - pip>=19.1,<20.3 + - setuptools==69.5.1 + - python>=3.7,<3.11 + - pip=24.0 - pytorch>=1.0,<=1.12.1 - cpuonly - pytorch-scatter<=2.0.9 diff --git a/conda_gpu.yml b/conda_gpu.yml index cd9d54981..eb603d3fd 100644 --- a/conda_gpu.yml +++ b/conda_gpu.yml @@ -5,8 +5,9 @@ channels: - https://conda.anaconda.org/pyg - https://conda.anaconda.org/conda-forge dependencies: - - python>=3.7,<3.9 - - pip>=19.1,<20.3 + - setuptools==69.5.1 + - python>=3.7,<3.11 + - pip=24.0 - pytorch>=1.0,<=1.12.1=*cu* - pytorch-scatter<=2.0.9=*cu102* - torchvision<=0.13.1=*cu* diff --git a/dev_requirements.txt b/dev_requirements.txt index 63105c76c..abd5ce319 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,4 +1,4 @@ -better-apidoc==0.3.1 +better-apidoc==0.3.2 black==22.3.0 docutils==0.17.1 flake8==3.8.4 @@ -6,11 +6,10 @@ flask==1.1.2 flask_login==0.5.0 # isort==5.7.0 licenseheaders==0.8.8 -mypy==0.950 -myst-parser==0.13.3 -pytest==6.1.1 +mypy>=1.0.0 +myst-parser==1.0.0 +pytest==6.2.5 pytest-cov==2.10.1 -sphinx==3.4.3 +sphinx>=5 sphinx-autodoc-typehints==1.11.1 jinja2<3.1.0 -sphinx_rtd_theme==0.5.1 \ No newline at end of file diff --git a/notebooks/discovery-demo.ipynb b/notebooks/discovery-demo.ipynb index 3453b832f..0677e9d3b 100644 --- a/notebooks/discovery-demo.ipynb +++ b/notebooks/discovery-demo.ipynb @@ -16,6 +16,16 @@ "id": "d6a65c61", "metadata": {}, "outputs": [], + "source": [ + "!pip install --upgrade protobuf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94cd4c8c", + "metadata": {}, + "outputs": [], "source": [ "from typing import List\n", "from rdkit import Chem\n", @@ -2325,7 +2335,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/requirements.txt b/requirements.txt index ba1ba67b6..30735523c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ # pypi requirements +setuptools==69.5.1 accelerate>=0.12,<0.20.0 datasets>=1.11.0 diffusers<=0.6.0 @@ -11,26 +12,28 @@ gt4sd-molformer>=0.1.1 gt4sd-trainer-hf-pl>=0.0.2 keras>=2.3.1,<2.11.0 keybert>=0.7.0 -markdown-it-py<3.0.0,>=2.2.0 +markdown-it-py minio==7.0.1 modlamp>=4.0.0 molgx>=0.22.0a1 nglview>=3.0.3 numpy>=1.16.5,<1.24.0 +pandas<=2.0.3 protobuf<3.20 pyarrow>=8.0.0 -pydantic>=1.7.3,<2.0.0 +pydantic>=2.0.0 pymatgen>=2022.11.7 PyTDC==0.3.7 pytorch_lightning<=1.7.7 pyyaml>=5.4.1 rdkit>=2022.3.5 +rdkit-stubs>=0.7 regex>=2.5.91 reinvent-chemistry==0.0.38 sacremoses>=0.0.41 scikit-learn>=1.0.0,<1.3.0 scikit-optimize>=0.8.1 -scipy>=1.0.0 +scipy>=1.0.0,<=1.11.0 sentencepiece>=0.1.95 sentence_transformers>1.0,<=2.2.2 sympy>=1.10.1 @@ -46,3 +49,5 @@ transformers>=4.22.0,<=4.24.0 typing_extensions>=3.7.4.3 wheel>=0.26 xgboost>=1.7.6 +sphinx_rtd_theme==0.5.1 +pydantic-settings>=2.0.0 diff --git a/setup.cfg b/setup.cfg index 20ea4625b..7bcdf6d75 100644 --- a/setup.cfg +++ b/setup.cfg @@ -98,6 +98,10 @@ ignore = E203, E501, W503 [mypy] check_untyped_defs = True plugins = pydantic.mypy +exclude = ^src/gt4sd/frameworks/gflownet/envs + +[mypy-gt4sd.frameworks.gflownet.envs.*] +follow_imports = skip [mypy-pytest.*] ignore_missing_imports = True @@ -105,6 +109,9 @@ ignore_missing_imports = True [mypy-rdkit.*] ignore_missing_imports = True +[mypy-rdkit-stubs.*] +ignore_errors = True + [mypy-setuptools.*] ignore_missing_imports = True @@ -141,7 +148,6 @@ ignore_missing_imports = True [mypy-transformers.*] ignore_missing_imports = True -# to avoid mypy from crashing (https://github.com/python/mypy/issues/11045) [mypy-transformers.trainer] check_untyped_defs = False @@ -284,4 +290,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-xgboost.*] +ignore_missing_imports = True + +[mypy-pydantic_settings.*] ignore_missing_imports = True \ No newline at end of file diff --git a/src/gt4sd/__init__.py b/src/gt4sd/__init__.py index 609eb33e1..bacfa5e8e 100644 --- a/src/gt4sd/__init__.py +++ b/src/gt4sd/__init__.py @@ -23,7 +23,7 @@ # """Module initialization.""" -__version__ = "1.4.1" +__version__ = "1.4.2" __name__ = "gt4sd" # NOTE: configure SSL to allow unverified contexts by default diff --git a/src/gt4sd/algorithms/conditional_generation/regression_transformer/implementation.py b/src/gt4sd/algorithms/conditional_generation/regression_transformer/implementation.py index 742f0cc4e..ec4025c68 100644 --- a/src/gt4sd/algorithms/conditional_generation/regression_transformer/implementation.py +++ b/src/gt4sd/algorithms/conditional_generation/regression_transformer/implementation.py @@ -1013,7 +1013,7 @@ def validate_output(self, sequences: List[Any]) -> Tuple[List[Any], List[int]]: if isinstance(self.tokenizer.text_tokenizer, PolymerGraphTokenizer): # Copolymer models require specific validation return validate_molecules( - pattern_list=list(zip(*sequences))[0], + pattern_list=list(zip(*sequences))[0], # type: ignore input_type=MoleculeFormat.copolymer, ) @@ -1086,9 +1086,11 @@ def filter_substructures( f"{keep} is not a valid SMILES/SELFIES. Instead substructure filtering " f"based on sequence alone can be done and is set to: {self.text_filtering}" ) - if keep not in self.substructures_to_keep and not Chem.MolFromSmiles( + if keep not in self.substructures_to_keep and not Chem.MolFromSmiles( # type: ignore self.target - ).HasSubstructMatch(subs_mol): + ).HasSubstructMatch( + subs_mol + ): logger.info( f"{keep} could not be identified in SMILES/SELFIES on text level AND no " "substructure match occurred, hence it will be ignored" @@ -1120,7 +1122,7 @@ def filter_substructures( sane = False break else: - if not mol.HasSubstructMatch(subs_mol): + if not mol.HasSubstructMatch(subs_mol): # type: ignore # Desired substructure not found sane = False break diff --git a/src/gt4sd/algorithms/conditional_generation/regression_transformer/utils.py b/src/gt4sd/algorithms/conditional_generation/regression_transformer/utils.py index f71e3fe79..d02766366 100644 --- a/src/gt4sd/algorithms/conditional_generation/regression_transformer/utils.py +++ b/src/gt4sd/algorithms/conditional_generation/regression_transformer/utils.py @@ -65,10 +65,10 @@ def filter_stubbed( Tuple of tuples of length 2 with filtered, generated molecule and its properties. """ - seed = Chem.MolFromSmiles(target) + seed = Chem.MolFromSmiles(target) # type: ignore - seed_atoms = len(list(seed.GetAtoms())) - seed_bonds = seed.GetNumBonds() + seed_atoms = len(list(seed.GetAtoms())) # type: ignore + seed_bonds = seed.GetNumBonds() # type: ignore smis: List[str] = [] props: List[str] = [] @@ -76,9 +76,9 @@ def filter_stubbed( if smi == "": continue try: - mol = Chem.MolFromSmiles(smi) - num_atoms = len(list(mol.GetAtoms())) - num_bonds = mol.GetNumBonds() + mol = Chem.MolFromSmiles(smi) # type: ignore + num_atoms = len(list(mol.GetAtoms())) # type: ignore + num_bonds = mol.GetNumBonds() # type: ignore if num_atoms > (threshold * seed_atoms) and num_bonds > ( threshold * seed_bonds diff --git a/src/gt4sd/algorithms/controlled_sampling/advanced_manufacturing/implementation/core.py b/src/gt4sd/algorithms/controlled_sampling/advanced_manufacturing/implementation/core.py index 09e264d6c..3f409e878 100644 --- a/src/gt4sd/algorithms/controlled_sampling/advanced_manufacturing/implementation/core.py +++ b/src/gt4sd/algorithms/controlled_sampling/advanced_manufacturing/implementation/core.py @@ -154,7 +154,7 @@ def __init__( targets: Dict[str, float], property_predictors: Dict[str, PropertyPredictor], representations: RepresentationsDict, - representation_order: List[str] = None, + representation_order: Optional[List[str]] = None, scalers: Optional[Dict[str, Scaler]] = None, weights: Optional[Dict[str, float]] = None, custom_score_function: Optional[ diff --git a/src/gt4sd/algorithms/core.py b/src/gt4sd/algorithms/core.py index 2c527c3f3..98321e299 100644 --- a/src/gt4sd/algorithms/core.py +++ b/src/gt4sd/algorithms/core.py @@ -25,7 +25,7 @@ from __future__ import annotations -import collections +from collections.abc import Hashable import logging import os import shutil @@ -233,7 +233,7 @@ def sample(self, number_of_items: int = 100) -> Iterator[S]: try: valid_item = self.configuration.validate_item(item) # check if sample is hashable - if not isinstance(item, collections.Hashable): + if not isinstance(item, Hashable): yield valid_item item_set.add(str(index)) else: @@ -623,9 +623,11 @@ def save_version_from_training_pipeline_arguments( target_version, ) filepaths_mapping = { - filename: source_filepath - if os.path.exists(source_filepath) - else os.path.join(source_missing_path, filename) + filename: ( + source_filepath + if os.path.exists(source_filepath) + else os.path.join(source_missing_path, filename) + ) for filename, source_filepath in filepaths_mapping.items() } logger.info(f"Saving artifacts into {target_path}...") @@ -713,9 +715,11 @@ def upload_version_from_training_pipeline_arguments( # mapping between filenames and paths for a version. filepaths_mapping = { - filename: source_filepath - if os.path.exists(source_filepath) - else os.path.join(source_missing_path, filename) + filename: ( + source_filepath + if os.path.exists(source_filepath) + else os.path.join(source_missing_path, filename) + ) for filename, source_filepath in filepaths_mapping.items() } @@ -908,8 +912,6 @@ def get_configuration_class_with_attributes( class PropertyPredictor(ABC, Generic[S, U]): - """TODO: Might be deprecated in future release.""" - def __init__(self, context: U) -> None: """Property predictor to investigate items. diff --git a/src/gt4sd/algorithms/generation/diffusion/geodiff/core.py b/src/gt4sd/algorithms/generation/diffusion/geodiff/core.py index d5fc64af6..41f16a81f 100644 --- a/src/gt4sd/algorithms/generation/diffusion/geodiff/core.py +++ b/src/gt4sd/algorithms/generation/diffusion/geodiff/core.py @@ -253,8 +253,8 @@ def visualize_2d_input(self, data: Data) -> None: molSize = (450, 300) drawer = MD2.MolDraw2DSVG(molSize[0], molSize[1]) drawer.DrawMolecule(mc) - drawer.FinishDrawing() - svg = drawer.GetDrawingText() + drawer.FinishDrawing() # type: ignore + svg = drawer.GetDrawingText() # type: ignore display(SVG(svg.replace("svg:", ""))) def visualize_3d(self, mols_gen: List[Chem.Mol]) -> None: diff --git a/src/gt4sd/algorithms/generation/diffusion/geodiff/model/core.py b/src/gt4sd/algorithms/generation/diffusion/geodiff/model/core.py index 93f2fabfb..f94463136 100644 --- a/src/gt4sd/algorithms/generation/diffusion/geodiff/model/core.py +++ b/src/gt4sd/algorithms/generation/diffusion/geodiff/model/core.py @@ -21,7 +21,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # -from typing import Any, Tuple, Union +from typing import Any, Tuple, Union, Optional import torch from diffusers.configuration_utils import ConfigMixin @@ -127,13 +127,13 @@ def _forward( bond_index: torch.Tensor, bond_type: torch.Tensor, batch: torch.Tensor, - edge_index: torch.Tensor = None, - edge_type: torch.Tensor = None, - edge_length: int = None, + edge_index: Optional[torch.Tensor] = None, + edge_type: Optional[torch.Tensor] = None, + edge_length: Optional[int] = None, return_edges: bool = False, extend_order: bool = True, extend_radius: bool = True, - is_sidechain: bool = None, + is_sidechain: Optional[bool] = None, ) -> Tuple[Any, ...]: """Forward pass for edges features. diff --git a/src/gt4sd/algorithms/generation/diffusion/implementation.py b/src/gt4sd/algorithms/generation/diffusion/implementation.py index 095315609..5a1d97c7c 100644 --- a/src/gt4sd/algorithms/generation/diffusion/implementation.py +++ b/src/gt4sd/algorithms/generation/diffusion/implementation.py @@ -63,7 +63,7 @@ def set_seed(seed: int = 42) -> None: """ np.random.seed(seed) torch.manual_seed(seed) - if torch.cuda.is_available: + if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) # type:ignore diff --git a/src/gt4sd/algorithms/generation/hugging_face/implementation.py b/src/gt4sd/algorithms/generation/hugging_face/implementation.py index 9a4688eff..aded43c49 100644 --- a/src/gt4sd/algorithms/generation/hugging_face/implementation.py +++ b/src/gt4sd/algorithms/generation/hugging_face/implementation.py @@ -79,7 +79,7 @@ def set_seed(seed: int = 42) -> None: """ np.random.seed(seed) torch.manual_seed(seed) - if torch.cuda.is_available: + if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) # type:ignore diff --git a/src/gt4sd/algorithms/registry.py b/src/gt4sd/algorithms/registry.py index e3af6c8b9..20927e1ad 100644 --- a/src/gt4sd/algorithms/registry.py +++ b/src/gt4sd/algorithms/registry.py @@ -28,7 +28,17 @@ from dataclasses import dataclass as vanilla_dataclass from dataclasses import field, make_dataclass from functools import WRAPPER_ASSIGNMENTS, update_wrapper -from typing import Any, Callable, ClassVar, Dict, List, NamedTuple, Optional, Type +from typing import ( + Any, + Callable, + ClassVar, + Dict, + List, + NamedTuple, + Optional, + Type, + TypeVar, +) import pydantic @@ -185,6 +195,8 @@ def decorator( ), ], # type: ignore ) + # NOTE: Needed to circumvent a pydantic TypeError: Parameter list to Generic[...] cannot be empty + VanillaConfiguration.__parameters__ = (TypeVar("T"),) # type: ignore # NOTE: Duplicate call necessary for pydantic >=1.10.* - see https://github.com/pydantic/pydantic/issues/4695 PydanticConfiguration: Type[AlgorithmConfiguration] = dataclass( # type: ignore VanillaConfiguration diff --git a/src/gt4sd/algorithms/tests/test_registry.py b/src/gt4sd/algorithms/tests/test_registry.py index 123a51241..286d99c33 100644 --- a/src/gt4sd/algorithms/tests/test_registry.py +++ b/src/gt4sd/algorithms/tests/test_registry.py @@ -58,19 +58,15 @@ def test_list_available_local_via_S3SyncError(mock_wrong_s3_env): def test_inherited_validation(): Config = next(iter(ApplicationsRegistry.applications.values())).configuration_class - with pytest.raises( - ValidationError, match="algorithm_version\n +none is not an allowed value" - ): + with pytest.raises(ValidationError, match="should be a valid string"): Config(algorithm_version=None) # type: ignore - # NOTE: values convertible to string will not raise! - Config(algorithm_version=5) # type: ignore + with pytest.raises(ValidationError, match="should be a valid string"): + Config(algorithm_version=5) # type: ignore def test_validation(): - with pytest.raises( - ValidationError, match="batch_size\n +value is not a valid integer" - ): + with pytest.raises(ValidationError, match="should be a valid integer"): ApplicationsRegistry.get_configuration_instance( algorithm_type="conditional_generation", domain="materials", @@ -80,25 +76,6 @@ def test_validation(): ) -def test_pickable_wrapped_configurations(): - # https://github.com/samuelcolvin/pydantic/issues/2111 - Config = next(iter(ApplicationsRegistry.applications.values())).configuration_class - restored_obj = assert_pickable(Config(algorithm_version="test")) - - # wrong type assignment, but we did not configure it to raise here: - restored_obj.algorithm_version = object - # ensure the restored dataclass is still a pydantic dataclass (mimic validation) - _, optional_errors = restored_obj.__pydantic_model__.__fields__.get( - "algorithm_version" - ).validate( - restored_obj.algorithm_version, - restored_obj.__dict__, - loc="algorithm_version", - cls=restored_obj.__class__, - ) - assert optional_errors is not None - - def test_multiple_registration(): class OtherAlgorithm(GeneratorAlgorithm): pass diff --git a/src/gt4sd/configuration.py b/src/gt4sd/configuration.py index b50876477..ac18e4a46 100644 --- a/src/gt4sd/configuration.py +++ b/src/gt4sd/configuration.py @@ -27,8 +27,7 @@ import os from functools import lru_cache from typing import Dict, Optional, Set - -from pydantic import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict from .s3 import GT4SDS3Client, S3SyncError, sync_folder_with_s3, upload_file_to_s3 @@ -65,10 +64,7 @@ class GT4SDConfiguration(BaseSettings): gt4sd_s3_secure_hub: bool = True gt4sd_s3_bucket_hub_algorithms: str = "gt4sd-cos-hub-algorithms-artifacts" gt4sd_s3_bucket_hub_properties: str = "gt4sd-cos-hub-properties-artifacts" - - class Config: - # immutable and in turn hashable, that is required for lru_cache - frozen = True + model_config = SettingsConfigDict(frozen=True) @staticmethod @lru_cache(maxsize=None) diff --git a/src/gt4sd/domains/materials/__init__.py b/src/gt4sd/domains/materials/__init__.py index b471b82f8..3836f89ae 100644 --- a/src/gt4sd/domains/materials/__init__.py +++ b/src/gt4sd/domains/materials/__init__.py @@ -71,7 +71,7 @@ def validate_smiles( for index, molecule in enumerate(molecules) if molecule is not None and molecule != "" ] - return molecules, valid_ids + return molecules, valid_ids # type: ignore def validate_selfies( diff --git a/src/gt4sd/frameworks/cgcnn/data.py b/src/gt4sd/frameworks/cgcnn/data.py index 5f468f4fe..b1d397a24 100644 --- a/src/gt4sd/frameworks/cgcnn/data.py +++ b/src/gt4sd/frameworks/cgcnn/data.py @@ -31,7 +31,7 @@ import logging import os import random -from typing import Any, Callable, List, Tuple, Union +from typing import Any, Callable, List, Tuple, Union, Optional import numpy as np import torch @@ -49,7 +49,7 @@ def get_train_val_test_loader( dataset: torch.utils.data.Dataset, collate_fn: Callable[[List[Any]], Any] = default_collate, batch_size: int = 64, - train_ratio: float = None, + train_ratio: Optional[float] = None, val_ratio: float = 0.1, test_ratio: float = 0.1, return_test: bool = False, @@ -212,7 +212,9 @@ class GaussianDistance: Unit: angstrom """ - def __init__(self, dmin: float, dmax: float, step: float, var: float = None): + def __init__( + self, dmin: float, dmax: float, step: float, var: Optional[float] = None + ): """ Args: dmin: float @@ -333,7 +335,7 @@ def __init__( dmin: int = 0, step: float = 0.2, random_seed: int = 123, - atom_initialization: AtomCustomJSONInitializer = None, + atom_initialization: Optional[AtomCustomJSONInitializer] = None, ): """ Args: diff --git a/src/gt4sd/frameworks/gflownet/dataloader/dataset.py b/src/gt4sd/frameworks/gflownet/dataloader/dataset.py index 1584075b6..78d95a3d9 100644 --- a/src/gt4sd/frameworks/gflownet/dataloader/dataset.py +++ b/src/gt4sd/frameworks/gflownet/dataloader/dataset.py @@ -24,7 +24,7 @@ import ast import os import pickle -from typing import Any, Callable, Dict, List, NewType, Tuple, Union +from typing import Any, Callable, Dict, List, NewType, Tuple, Union, Optional import numpy as np import pandas as pd @@ -50,7 +50,10 @@ class GFlowNetDataset(Dataset): """A dataset for gflownet.""" def __init__( - self, h5_file: str = None, target: str = "gap", properties: List[str] = [] + self, + h5_file: Optional[str] = None, + target: str = "gap", + properties: List[str] = [], ) -> None: """Initialize a gflownet dataset. @@ -239,8 +242,8 @@ def __init__( self, configuration: Dict[str, Any], dataset: GFlowNetDataset, - reward_model: nn.Module = None, - wrap_model: Callable[[nn.Module], nn.Module] = None, + reward_model: Optional[nn.Module] = None, + wrap_model: Optional[Callable[[nn.Module], nn.Module]] = None, ) -> None: """Initialize a generic gflownet task. @@ -282,7 +285,7 @@ def load_task_models(self) -> Dict[str, nn.Module]: Returns: model: a dictionary with the task models. """ - pass + raise NotImplementedError() def sample_conditional_information(self, n: int) -> Dict[str, Any]: """Samples conditional information for a minibatch. @@ -293,7 +296,7 @@ def sample_conditional_information(self, n: int) -> Dict[str, Any]: Returns: cond_info: a dictionary with the sampled conditional information. """ - pass + raise NotImplementedError() def cond_info_to_reward( self, cond_info: Dict[str, Any], flat_reward: FlatRewards diff --git a/src/gt4sd/frameworks/gflownet/envs/graph_building_env.py b/src/gt4sd/frameworks/gflownet/envs/graph_building_env.py index f77ca2710..de3eed10a 100644 --- a/src/gt4sd/frameworks/gflownet/envs/graph_building_env.py +++ b/src/gt4sd/frameworks/gflownet/envs/graph_building_env.py @@ -23,7 +23,7 @@ # import copy import enum -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union, Optional import networkx as nx import numpy as np @@ -131,11 +131,11 @@ class GraphAction: def __init__( self, action: GraphActionType, - source: int = None, - target: int = None, + source: Optional[int] = None, + target: Optional[int] = None, value: Any = None, - attr: str = None, - relabel: int = None, + attr: Optional[str] = None, + relabel: Optional[int] = None, ): """Initialize a single graph-building action. @@ -287,7 +287,7 @@ def count_backward_transitions(self, g: Graph) -> int: def generate_forward_trajectory( - g: Graph, max_nodes: int = None + g: Graph, max_nodes: Optional[int] = None ) -> List[Tuple[Graph, GraphAction]]: """Sample (uniformly) a trajectory that generates g. diff --git a/src/gt4sd/frameworks/gflownet/loss/trajectory_balance.py b/src/gt4sd/frameworks/gflownet/loss/trajectory_balance.py index 36d3f1611..fafff83cc 100644 --- a/src/gt4sd/frameworks/gflownet/loss/trajectory_balance.py +++ b/src/gt4sd/frameworks/gflownet/loss/trajectory_balance.py @@ -23,7 +23,7 @@ # import copy from itertools import count -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union, Optional import numpy as np import torch @@ -77,7 +77,7 @@ def __init__( configuration: Dict[str, Any], environment: GraphBuildingEnv, context: GraphBuildingEnvContext, - max_len: int = None, + max_len: Optional[int] = None, ): """Initialize trajectory balance algorithm. diff --git a/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py b/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py index fadc8b660..0813b4e29 100644 --- a/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py +++ b/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py @@ -873,8 +873,8 @@ def rdkit_conformation(mol, n=5, addHs=False): confs = AllChem.EmbedMultipleConfs(mol, numConfs=n, params=params) minc, aminc = 1000, 0 for i in range(len(confs)): - mp = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant="MMFF94s") - ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=i) + mp = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant="MMFF94s") # type: ignore + ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=i) # type: ignore if ff is None: continue e = ff.CalcEnergy() diff --git a/src/gt4sd/frameworks/gflownet/tests/qm9.py b/src/gt4sd/frameworks/gflownet/tests/qm9.py index 33f15cf7d..665616cd0 100644 --- a/src/gt4sd/frameworks/gflownet/tests/qm9.py +++ b/src/gt4sd/frameworks/gflownet/tests/qm9.py @@ -21,7 +21,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # -from typing import Any, Callable, Dict, List, Tuple, Union +from typing import Any, Callable, Dict, List, Tuple, Union, Optional import numpy as np import torch @@ -113,8 +113,8 @@ def __init__( self, configuration: Dict[str, Any], dataset: GFlowNetDataset, - reward_model: nn.Module = None, - wrap_model: Callable[[nn.Module], nn.Module] = None, + reward_model: Optional[nn.Module] = None, + wrap_model: Optional[Callable[[nn.Module], nn.Module]] = None, ): """Initialize QM9 task. diff --git a/src/gt4sd/properties/core.py b/src/gt4sd/properties/core.py index 821ebede0..7194c00bc 100644 --- a/src/gt4sd/properties/core.py +++ b/src/gt4sd/properties/core.py @@ -47,19 +47,21 @@ class S3Parameters(PropertyPredictorParameters): algorithm_type: str = "prediction" domain: DomainSubmodule = Field( - ..., example="molecules", description="Submodule of gt4sd.properties" + ..., examples=["molecules"], description="Submodule of gt4sd.properties" + ) + algorithm_name: str = Field( + ..., examples=["MCA"], description="Name of the algorithm" ) - algorithm_name: str = Field(..., example="MCA", description="Name of the algorithm") algorithm_version: str = Field( - ..., example="v0", description="Version of the algorithm" + ..., examples=["v0"], description="Version of the algorithm" ) - algorithm_application: str = Field(..., example="Tox21") + algorithm_application: str = Field(..., examples=["Tox21"]) class ApiTokenParameters(PropertyPredictorParameters): api_token: str = Field( ..., - example="apk-c9db......", + examples=["apk-c9db......"], description="The API token/key to access the service", ) @@ -68,7 +70,7 @@ class IpAdressParameters(PropertyPredictorParameters): host_ip: str = Field( ..., - example="xx.xx.xxx.xxx", + examples=["xx.xx.xxx.xxx"], description="The host IP address to access the service", ) diff --git a/src/gt4sd/properties/molecules/core.py b/src/gt4sd/properties/molecules/core.py index 07c51f1a3..83ec3dcab 100644 --- a/src/gt4sd/properties/molecules/core.py +++ b/src/gt4sd/properties/molecules/core.py @@ -59,7 +59,7 @@ from paccmann_generator.drug_evaluators import OrganDB as _OrganTox from paccmann_generator.drug_evaluators import SCScore from paccmann_generator.drug_evaluators import Tox21 as _Tox21 -from pydantic import Field +from pydantic import ConfigDict, Field from tdc import Oracle from tdc.metadata import download_receptor_oracle_name @@ -119,12 +119,12 @@ class ScscoreConfiguration(PropertyPredictorParameters): class SimilaritySeedParameters(PropertyPredictorParameters): - smiles: str = Field(..., example="c1ccccc1") + smiles: str = Field(..., examples=["c1ccccc1"]) fp_key: str = "ECFP4" class ActivityAgainstTargetParameters(PropertyPredictorParameters): - target: str = Field(..., example="drd2", description="name of the target.") + target: str = Field(..., examples=["drd2"], description="name of the target.") class AskcosParameters(IpAdressParameters): @@ -136,9 +136,8 @@ class Output(str, Enum): output: Output = Field( default=Output.plausability, - example=Output.synthesizability, + examples=[Output.synthesizability], description="Main output return type from ASKCOS", - options=["plausibility", "num_step", "synthesizability", "price"], ) save_json: bool = Field(default=False) file_name: str = Field(default="tree_builder_result.json") @@ -159,10 +158,7 @@ class Output(str, Enum): min_chempop_products: int = Field(default=5) filter_threshold: float = Field(default=0.1) return_first: str = Field(default="true") - - # Convert enum items back to strings - class Config: - use_enum_values = True + model_config = ConfigDict(use_enum_values=True) class MoleculeOneParameters(ApiTokenParameters): @@ -174,9 +170,8 @@ class DockingTdcParameters(PropertyPredictorParameters): # To dock against a receptor defined via TDC target: str = Field( ..., - example="1iep_docking", + examples=download_receptor_oracle_name, description="Target for docking, provided via TDC", - options=download_receptor_oracle_name, ) @@ -184,12 +179,14 @@ class DockingParameters(PropertyPredictorParameters): # To dock against a user-provided receptor name: str = Field(default="pyscreener") receptor_pdb_file: str = Field( - example="/tmp/2hbs.pdb", description="Path to receptor PDB file" + examples=["/tmp/2hbs.pdb"], description="Path to receptor PDB file" ) box_center: List[int] = Field( - example=[15.190, 53.903, 16.917], description="Docking box center" + examples=[[15.190, 53.903, 16.917]], description="Docking box center" + ) + box_size: List[float] = Field( + examples=[[20, 20, 20]], description="Docking box size" ) - box_size: List[float] = Field(example=[20, 20, 20], description="Docking box size") class S3ParametersMolecules(S3Parameters): @@ -265,14 +262,13 @@ class ToxType(str, Enum): algorithm_application: str = "OrganTox" site: Organs = Field( ..., - example=Organs.kidney, + examples=[Organs.kidney], description="name of the target site of interest.", ) toxicity_type: ToxType = Field( default=ToxType.all, - example=ToxType.chronic, + examples=[ToxType.chronic], description="type of toxicity for which predictions are made.", - options=["chronic", "subchronic", "multigenerational", "all"], ) diff --git a/src/gt4sd/properties/molecules/functions.py b/src/gt4sd/properties/molecules/functions.py index 0deee4d47..2c5d1d6ad 100644 --- a/src/gt4sd/properties/molecules/functions.py +++ b/src/gt4sd/properties/molecules/functions.py @@ -195,7 +195,7 @@ def number_of_rotatable_bonds(mol: SmallMolecule) -> int: def number_of_large_rings(mol: SmallMolecule) -> int: """Calculate the amount of large rings (> 6 atoms) of a molecule.""" mol = to_mol(mol) - ringinfo = mol.GetRingInfo() + ringinfo = mol.GetRingInfo() # type: ignore return len([x for x in ringinfo.AtomRings() if len(x) > 6]) diff --git a/src/gt4sd/properties/proteins/core.py b/src/gt4sd/properties/proteins/core.py index 29859f045..40aeae567 100644 --- a/src/gt4sd/properties/proteins/core.py +++ b/src/gt4sd/properties/proteins/core.py @@ -46,7 +46,7 @@ class AmideConfiguration(PropertyPredictorParameters): amide: bool = Field( False, - example=False, + examples=[False], description="whether the sequences are C-terminally amidated.", ) @@ -58,7 +58,7 @@ class PhConfiguration(PropertyPredictorParameters): class AmidePhConfiguration(PropertyPredictorParameters): amide: bool = Field( False, - example=False, + examples=[False], description="whether the sequences are C-terminally amidated.", ) ph: float = 7.0 diff --git a/src/gt4sd/properties/scores/core.py b/src/gt4sd/properties/scores/core.py index ddd9a2372..4bfb163b0 100644 --- a/src/gt4sd/properties/scores/core.py +++ b/src/gt4sd/properties/scores/core.py @@ -23,7 +23,7 @@ # """Implementation of scorers.""" from functools import partial -from typing import Any, Callable, Dict, List, Type +from typing import Any, Callable, Dict, List, Type, Optional import numpy as np from rdkit import Chem @@ -151,7 +151,7 @@ class CombinedScorer: def __init__( self, scorer_list: List[Type[Any]], - weights: List[float] = None, + weights: Optional[List[float]] = None, ) -> None: """Scoring function which generates a combined score for a SMILES as per the given scoring functions. @@ -235,7 +235,7 @@ def score(self, smiles: str) -> float: descriptor=self.descriptor, score_modifier=self.modifier, ) - return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) + return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) # type: ignore class TanimotoScorer(TargetValueScorer): @@ -274,7 +274,7 @@ def score(self, smiles: str) -> float: fp_type=self.fp_type, score_modifier=self.modifier, ) - return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) + return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) # type: ignore class IsomerScorer(TargetValueScorer): @@ -326,7 +326,7 @@ def score(self, smiles: str) -> float: A score for the given SMILES """ scoring_function = SMARTSScoringFunction(self.target_smile, self.inverse) - return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) + return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) # type: ignore class QEDScorer(TargetValueScorer): diff --git a/src/gt4sd/properties/utils.py b/src/gt4sd/properties/utils.py index 130215d3a..a6a5286b3 100644 --- a/src/gt4sd/properties/utils.py +++ b/src/gt4sd/properties/utils.py @@ -49,14 +49,14 @@ def to_mol(mol: SmallMolecule) -> Chem.Mol: a rdkit.Chem.Mol object. """ if isinstance(mol, str): - mol = Chem.MolFromSmiles(mol) + mol = Chem.MolFromSmiles(mol) # type: ignore elif isinstance(mol, Chem.Mol): pass else: raise TypeError( f"Please provide SMILES string or rdkit.Chem.Mol object not {type(mol)}" ) - return mol + return mol # type: ignore def to_smiles(mol: SmallMolecule) -> str: @@ -70,7 +70,7 @@ def to_smiles(mol: SmallMolecule) -> str: """ if isinstance(mol, str): try: - mol = Chem.MolFromSmiles(mol) + mol = Chem.MolFromSmiles(mol) # type: ignore except Exception: raise ValueError( f"Could not convert SMILES string to rdkit.Chem.Mol: {mol}" @@ -82,7 +82,7 @@ def to_smiles(mol: SmallMolecule) -> str: f"Pass a SMILES string or rdkit.Chem.Mol object not {type(mol)}" ) - return Chem.MolToSmiles(mol, canonical=True) + return Chem.MolToSmiles(mol, canonical=True) # type: ignore def get_similarity_fn( diff --git a/src/gt4sd/tests/utils.py b/src/gt4sd/tests/utils.py index ff1ddf877..8ee88bc0f 100644 --- a/src/gt4sd/tests/utils.py +++ b/src/gt4sd/tests/utils.py @@ -29,7 +29,7 @@ from pathlib import PosixPath import importlib_resources -from pydantic import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict class GT4SDTestSettings(BaseSettings): @@ -40,10 +40,7 @@ class GT4SDTestSettings(BaseSettings): gt4sd_s3_secret_key: str = "5748375c761a4f09c30a68cd15e218e3b27ca3e2aebd7726" gt4sd_s3_secure: bool = True gt4sd_ci: bool = False - - class Config: - # immutable and in turn hashable, that is required for lru_cache - frozen = True + model_config = SettingsConfigDict(frozen=True) @staticmethod @lru_cache(maxsize=None)