diff --git a/antarest/matrixstore/repository.py b/antarest/matrixstore/repository.py index f96e80de67..6301e39c7f 100644 --- a/antarest/matrixstore/repository.py +++ b/antarest/matrixstore/repository.py @@ -1,7 +1,7 @@ import hashlib import logging +import typing as t from pathlib import Path -from typing import List, Optional, Union import numpy as np from filelock import FileLock @@ -31,19 +31,19 @@ def save(self, matrix_user_metadata: MatrixDataSet) -> MatrixDataSet: logger.debug(f"Matrix dataset {matrix_user_metadata.id} for user {matrix_user_metadata.owner_id} saved") return matrix_user_metadata - def get(self, id: str) -> Optional[MatrixDataSet]: + def get(self, id: str) -> t.Optional[MatrixDataSet]: matrix: MatrixDataSet = db.session.query(MatrixDataSet).get(id) return matrix - def get_all_datasets(self) -> List[MatrixDataSet]: - matrix_datasets: List[MatrixDataSet] = db.session.query(MatrixDataSet).all() + def get_all_datasets(self) -> t.List[MatrixDataSet]: + matrix_datasets: t.List[MatrixDataSet] = db.session.query(MatrixDataSet).all() return matrix_datasets def query( self, - name: Optional[str], - owner: Optional[int] = None, - ) -> List[MatrixDataSet]: + name: t.Optional[str], + owner: t.Optional[int] = None, + ) -> t.List[MatrixDataSet]: """ Query a list of MatrixUserMetadata by searching for each one separately if a set of filter match @@ -59,7 +59,7 @@ def query( query = query.filter(MatrixDataSet.name.ilike(f"%{name}%")) # type: ignore if owner is not None: query = query.filter(MatrixDataSet.owner_id == owner) - datasets: List[MatrixDataSet] = query.distinct().all() + datasets: t.List[MatrixDataSet] = query.distinct().all() return datasets def delete(self, dataset_id: str) -> None: @@ -83,7 +83,7 @@ def save(self, matrix: Matrix) -> Matrix: logger.debug(f"Matrix {matrix.id} saved") return matrix - def get(self, matrix_hash: str) -> Optional[Matrix]: + def get(self, matrix_hash: str) -> t.Optional[Matrix]: matrix: Matrix = db.session.query(Matrix).get(matrix_hash) return matrix @@ -130,6 +130,7 @@ def get(self, matrix_hash: str) -> MatrixContent: matrix_file = self.bucket_dir.joinpath(f"{matrix_hash}.tsv") matrix = np.loadtxt(matrix_file, delimiter="\t", dtype=np.float64, ndmin=2) + matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix data = matrix.tolist() index = list(range(matrix.shape[0])) columns = list(range(matrix.shape[1])) @@ -148,7 +149,7 @@ def exists(self, matrix_hash: str) -> bool: matrix_file = self.bucket_dir.joinpath(f"{matrix_hash}.tsv") return matrix_file.exists() - def save(self, content: Union[List[List[MatrixData]], npt.NDArray[np.float64]]) -> str: + def save(self, content: t.Union[t.List[t.List[MatrixData]], npt.NDArray[np.float64]]) -> str: """ Saves the content of a matrix as a TSV file in the bucket directory and returns its SHA256 hash. @@ -188,8 +189,12 @@ def save(self, content: Union[List[List[MatrixData]], npt.NDArray[np.float64]]) # Ensure exclusive access to the matrix file between multiple processes (or threads). lock_file = matrix_file.with_suffix(".tsv.lock") with FileLock(lock_file, timeout=15): - # noinspection PyTypeChecker - np.savetxt(matrix_file, matrix, delimiter="\t", fmt="%.18f") + if matrix.size == 0: + # If the array or dataframe is empty, create an empty file instead of + # traditional saving to avoid unwanted line breaks. + open(matrix_file, mode="wb").close() + else: + np.savetxt(matrix_file, matrix, delimiter="\t", fmt="%.18f") # IMPORTANT: Deleting the lock file under Linux can make locking unreliable. # See https://github.com/tox-dev/py-filelock/issues/31 diff --git a/antarest/matrixstore/service.py b/antarest/matrixstore/service.py index c20b0197dc..639084b587 100644 --- a/antarest/matrixstore/service.py +++ b/antarest/matrixstore/service.py @@ -1,12 +1,13 @@ import contextlib +import io +import json import logging import tempfile +import zipfile from abc import ABC, abstractmethod from datetime import datetime, timezone -from io import BytesIO from pathlib import Path from typing import List, Optional, Sequence, Tuple, Union -from zipfile import ZipFile import numpy as np from fastapi import UploadFile @@ -36,6 +37,18 @@ ) from antarest.matrixstore.repository import MatrixContentRepository, MatrixDataSetRepository, MatrixRepository +# List of files to exclude from ZIP archives +EXCLUDED_FILES = { + "__MACOSX", + ".DS_Store", + "._.DS_Store", + "Thumbs.db", + "desktop.ini", + "$RECYCLE.BIN", + "System Volume Information", + "RECYCLER", +} + logger = logging.getLogger(__name__) @@ -150,29 +163,42 @@ def create(self, data: Union[List[List[MatrixData]], npt.NDArray[np.float64]]) - self.repo.save(matrix) return matrix_id - def create_by_importation(self, file: UploadFile, json: bool = False) -> List[MatrixInfoDTO]: + def create_by_importation(self, file: UploadFile, is_json: bool = False) -> List[MatrixInfoDTO]: + """ + Imports a matrix from a TSV or JSON file or a collection of matrices from a ZIP file. + + TSV-formatted files are expected to contain only matrix data without any header. + + JSON-formatted files are expected to contain the following attributes: + + - `index`: The list of row labels. + - `columns`: The list of column labels. + - `data`: The matrix data as a nested list of floats. + + Args: + file: The file to import (TSV, JSON or ZIP). + is_json: Flag indicating if the file is JSON-encoded. + + Returns: + A list of `MatrixInfoDTO` objects containing the SHA256 hash of the imported matrices. + """ with file.file as f: if file.content_type == "application/zip": - input_zip = ZipFile(BytesIO(f.read())) - files = { - info.filename: input_zip.read(info.filename) for info in input_zip.infolist() if not info.is_dir() - } + with contextlib.closing(f): + buffer = io.BytesIO(f.read()) matrix_info: List[MatrixInfoDTO] = [] - for name in files: - if all( - [ - not name.startswith("__MACOSX/"), - not name.startswith(".DS_Store"), - ] - ): - matrix_id = self._file_importation(files[name], json) - matrix_info.append(MatrixInfoDTO(id=matrix_id, name=name)) + with zipfile.ZipFile(buffer) as zf: + for info in zf.infolist(): + if info.is_dir() or info.filename in EXCLUDED_FILES: + continue + matrix_id = self._file_importation(zf.read(info.filename), is_json=is_json) + matrix_info.append(MatrixInfoDTO(id=matrix_id, name=info.filename)) return matrix_info else: - matrix_id = self._file_importation(f.read(), json) + matrix_id = self._file_importation(f.read(), is_json=is_json) return [MatrixInfoDTO(id=matrix_id, name=file.filename)] - def _file_importation(self, file: bytes, is_json: bool = False) -> str: + def _file_importation(self, file: bytes, *, is_json: bool = False) -> str: """ Imports a matrix from a TSV or JSON file in bytes format. @@ -184,9 +210,12 @@ def _file_importation(self, file: bytes, is_json: bool = False) -> str: A SHA256 hash that identifies the imported matrix. """ if is_json: - return self.create(MatrixContent.parse_raw(file).data) + obj = json.loads(file) + content = MatrixContent(**obj) + return self.create(content.data) # noinspection PyTypeChecker - matrix = np.loadtxt(BytesIO(file), delimiter="\t", dtype=np.float64, ndmin=2) + matrix = np.loadtxt(io.BytesIO(file), delimiter="\t", dtype=np.float64, ndmin=2) + matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix return self.create(matrix) def get_dataset( @@ -380,8 +409,13 @@ def create_matrix_files(self, matrix_ids: Sequence[str], export_path: Path) -> s name = f"matrix-{mtx.id}.txt" filepath = f"{tmpdir}/{name}" array = np.array(mtx.data, dtype=np.float64) - # noinspection PyTypeChecker - np.savetxt(filepath, array, delimiter="\t", fmt="%.18f") + if array.size == 0: + # If the array or dataframe is empty, create an empty file instead of + # traditional saving to avoid unwanted line breaks. + open(filepath, mode="wb").close() + else: + # noinspection PyTypeChecker + np.savetxt(filepath, array, delimiter="\t", fmt="%.18f") zip_dir(Path(tmpdir), export_path) stopwatch.log_elapsed(lambda x: logger.info(f"Matrix dataset exported (zipped mode) in {x}s")) return str(export_path) @@ -467,5 +501,10 @@ def download_matrix( raise UserHasNotPermissionError() if matrix := self.get(matrix_id): array = np.array(matrix.data, dtype=np.float64) - # noinspection PyTypeChecker - np.savetxt(filepath, array, delimiter="\t", fmt="%.18f") + if array.size == 0: + # If the array or dataframe is empty, create an empty file instead of + # traditional saving to avoid unwanted line breaks. + open(filepath, mode="wb").close() + else: + # noinspection PyTypeChecker + np.savetxt(filepath, array, delimiter="\t", fmt="%.18f") diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index a13d6c6ad0..01717c57bd 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -11,7 +11,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, formatted: bool = True) -> Optional[SUB_JSON]: + def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -52,19 +52,17 @@ def _resolve_matrix(self, id: str, formatted: bool = True) -> SUB_JSON: index=data.index, columns=data.columns, ) - if not df.empty: - return ( - df.to_csv( - None, - sep="\t", - header=False, - index=False, - float_format="%.6f", - ) - or "" - ) - else: + if df.empty: return "" + else: + csv = df.to_csv( + None, + sep="\t", + header=False, + index=False, + float_format="%.6f", + ) + return csv or "" raise ValueError(f"id matrix {id} not found") def build_matrix_uri(self, id: str) -> str: diff --git a/antarest/matrixstore/web.py b/antarest/matrixstore/web.py index a97ae7d45b..4b47135b52 100644 --- a/antarest/matrixstore/web.py +++ b/antarest/matrixstore/web.py @@ -55,7 +55,7 @@ def create_by_importation( ) -> Any: logger.info("Importing new matrix dataset", extra={"user": current_user.id}) if current_user.id is not None: - return service.create_by_importation(file, json) + return service.create_by_importation(file, is_json=json) raise UserHasNotPermissionError() @bp.get("/matrix/{id}", tags=[APITag.matrix], response_model=MatrixDTO) diff --git a/antarest/study/service.py b/antarest/study/service.py index 332fc384da..b9ec491c18 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -1378,6 +1378,7 @@ def _create_edit_study_command( if isinstance(data, bytes): # noinspection PyTypeChecker matrix = np.loadtxt(io.BytesIO(data), delimiter="\t", dtype=np.float64, ndmin=2) + matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix return ReplaceMatrix( target=url, matrix=matrix.tolist(), diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py index 13ea8e0400..6f82eaab34 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py @@ -98,15 +98,16 @@ def _dump_json(self, data: JSON) -> None: matrix = pd.concat([time, matrix], axis=1) head = self.head_writer.build(var=df.columns.size, end=df.index.size) - self.config.path.write_text(head) - - matrix.to_csv( - open(self.config.path, "a", newline="\n"), - sep="\t", - index=False, - header=False, - line_terminator="\n", - ) + with self.config.path.open(mode="w", newline="\n") as fd: + fd.write(head) + if not matrix.empty: + matrix.to_csv( + fd, + sep="\t", + header=False, + index=False, + float_format="%.6f", + ) def check_errors( self, diff --git a/antarest/tools/lib.py b/antarest/tools/lib.py index b27e4dcee4..2d2953e3f5 100644 --- a/antarest/tools/lib.py +++ b/antarest/tools/lib.py @@ -77,6 +77,7 @@ def apply_commands( matrix_dataset: List[str] = [] for matrix_file in matrices_dir.iterdir(): matrix = np.loadtxt(matrix_file, delimiter="\t", dtype=np.float64, ndmin=2) + matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix matrix_data = matrix.tolist() res = self.session.post(self.build_url("/v1/matrix"), json=matrix_data) res.raise_for_status() diff --git a/tests/matrixstore/test_repository.py b/tests/matrixstore/test_repository.py index 9d1254953a..3973a18d39 100644 --- a/tests/matrixstore/test_repository.py +++ b/tests/matrixstore/test_repository.py @@ -1,9 +1,10 @@ +import typing as t from datetime import datetime from pathlib import Path -from typing import Optional import numpy as np import pytest +from numpy import typing as npt from antarest.core.config import Config, SecurityConfig from antarest.core.utils.fastapi_sqlalchemy import db @@ -12,16 +13,15 @@ from antarest.matrixstore.model import Matrix, MatrixContent, MatrixDataSet, MatrixDataSetRelation from antarest.matrixstore.repository import MatrixContentRepository, MatrixDataSetRepository, MatrixRepository +ArrayData = t.Union[t.List[t.List[float]], npt.NDArray[np.float64]] + class TestMatrixRepository: - def test_db_lifecycle(self): + def test_db_lifecycle(self) -> None: with db(): # sourcery skip: extract-method repo = MatrixRepository() - m = Matrix( - id="hello", - created_at=datetime.now(), - ) + m = Matrix(id="hello", created_at=datetime.now()) repo.save(m) assert m.id assert m == repo.get(m.id) @@ -29,11 +29,11 @@ def test_db_lifecycle(self): repo.delete(m.id) assert repo.get(m.id) is None - def test_bucket_lifecycle(self, tmp_path: Path): + def test_bucket_lifecycle(self, tmp_path: Path) -> None: repo = MatrixContentRepository(tmp_path) - a = [[1, 2], [3, 4]] - b = [[5, 6], [7, 8]] + a: ArrayData = [[1, 2], [3, 4]] + b: ArrayData = [[5, 6], [7, 8]] matrix_content_a = MatrixContent(data=a, index=[0, 1], columns=[0, 1]) matrix_content_b = MatrixContent(data=b, index=[0, 1], columns=[0, 1]) @@ -51,7 +51,7 @@ def test_bucket_lifecycle(self, tmp_path: Path): with pytest.raises(FileNotFoundError): repo.get(aid) - def test_dataset(self): + def test_dataset(self) -> None: with db(): # sourcery skip: extract-duplicate-method, extract-method repo = MatrixRepository() @@ -66,15 +66,9 @@ def test_dataset(self): dataset_repo = MatrixDataSetRepository() - m1 = Matrix( - id="hello", - created_at=datetime.now(), - ) + m1 = Matrix(id="hello", created_at=datetime.now()) repo.save(m1) - m2 = Matrix( - id="world", - created_at=datetime.now(), - ) + m2 = Matrix(id="world", created_at=datetime.now()) repo.save(m2) dataset = MatrixDataSet( @@ -94,7 +88,7 @@ def test_dataset(self): dataset.matrices.append(matrix_relation) dataset = dataset_repo.save(dataset) - dataset_query_result: Optional[MatrixDataSet] = dataset_repo.get(dataset.id) + dataset_query_result = dataset_repo.get(dataset.id) assert dataset_query_result is not None assert dataset_query_result.name == "some name" assert len(dataset_query_result.matrices) == 2 @@ -106,12 +100,12 @@ def test_dataset(self): updated_at=datetime.now(), ) dataset_repo.save(dataset_update) - dataset_query_result: Optional[MatrixDataSet] = dataset_repo.get(dataset.id) + dataset_query_result = dataset_repo.get(dataset.id) assert dataset_query_result is not None assert dataset_query_result.name == "some name change" assert dataset_query_result.owner_id == user.id - def test_datastore_query(self): + def test_datastore_query(self) -> None: # sourcery skip: extract-duplicate-method with db(): user_repo = UserRepository(Config(security=SecurityConfig())) @@ -121,15 +115,9 @@ def test_datastore_query(self): user2 = user_repo.save(User(name="hello", password=Password("world"))) repo = MatrixRepository() - m1 = Matrix( - id="hello", - created_at=datetime.now(), - ) + m1 = Matrix(id="hello", created_at=datetime.now()) repo.save(m1) - m2 = Matrix( - id="world", - created_at=datetime.now(), - ) + m2 = Matrix(id="world", created_at=datetime.now()) repo.save(m2) dataset_repo = MatrixDataSetRepository() @@ -176,14 +164,19 @@ def test_datastore_query(self): assert repo.get(m1.id) is not None assert ( len( - db.session.query(MatrixDataSetRelation).filter(MatrixDataSetRelation.dataset_id == dataset.id).all() + # fmt: off + db.session + .query(MatrixDataSetRelation) + .filter(MatrixDataSetRelation.dataset_id == dataset.id) + .all() + # fmt: on ) == 0 ) class TestMatrixContentRepository: - def test_save(self, matrix_content_repo: MatrixContentRepository): + def test_save(self, matrix_content_repo: MatrixContentRepository) -> None: """ Saves the content of a matrix as a TSV file in the directory and returns its SHA256 hash. @@ -192,6 +185,7 @@ def test_save(self, matrix_content_repo: MatrixContentRepository): bucket_dir = matrix_content_repo.bucket_dir # when the data is saved in the repo + data: ArrayData data = [[1, 2, 3], [4, 5, 6]] matrix_hash = matrix_content_repo.save(data) # then a TSV file is created in the repo directory @@ -224,12 +218,37 @@ def test_save(self, matrix_content_repo: MatrixContentRepository): other_matrix_file = bucket_dir.joinpath(f"{other_matrix_hash}.tsv") assert set(matrix_files) == {matrix_file, other_matrix_file} - def test_get(self, matrix_content_repo): + def test_save_and_retrieve_empty_matrix(self, matrix_content_repo: MatrixContentRepository) -> None: + """ + Test saving and retrieving empty matrices as TSV files. + Il all cases the file must be empty. + """ + bucket_dir = matrix_content_repo.bucket_dir + + # Test with an empty matrix + empty_array: ArrayData = [] + matrix_hash = matrix_content_repo.save(empty_array) + matrix_file = bucket_dir.joinpath(f"{matrix_hash}.tsv") + retrieved_matrix = matrix_content_repo.get(matrix_hash) + + assert not matrix_file.read_bytes() + assert retrieved_matrix.data == [[]] + + # Test with an empty 2D array + empty_2d_array: ArrayData = [[]] + matrix_hash = matrix_content_repo.save(empty_2d_array) + matrix_file = bucket_dir.joinpath(f"{matrix_hash}.tsv") + retrieved_matrix = matrix_content_repo.get(matrix_hash) + + assert not matrix_file.read_bytes() + assert retrieved_matrix.data == [[]] + + def test_get(self, matrix_content_repo: MatrixContentRepository) -> None: """ Retrieves the content of a matrix with a given SHA256 hash. """ # when the data is saved in the repo - data = [[1, 2, 3], [4, 5, 6]] + data: ArrayData = [[1, 2, 3], [4, 5, 6]] matrix_hash = matrix_content_repo.save(data) # then the saved matrix object can be retrieved content = matrix_content_repo.get(matrix_hash) @@ -243,12 +262,12 @@ def test_get(self, matrix_content_repo): missing_hash = "8b1a9953c4611296a827abf8c47804d7e6c49c6b" matrix_content_repo.get(missing_hash) - def test_exists(self, matrix_content_repo): + def test_exists(self, matrix_content_repo: MatrixContentRepository) -> None: """ Checks if a matrix with a given SHA256 hash exists in the directory. """ # when the data is saved in the repo - data = [[1, 2, 3], [4, 5, 6]] + data: ArrayData = [[1, 2, 3], [4, 5, 6]] matrix_hash = matrix_content_repo.save(data) # then the saved matrix object exists assert matrix_content_repo.exists(matrix_hash) @@ -258,12 +277,12 @@ def test_exists(self, matrix_content_repo): missing_hash = "8b1a9953c4611296a827abf8c47804d7e6c49c6b" assert not matrix_content_repo.exists(missing_hash) - def test_delete(self, matrix_content_repo): + def test_delete(self, matrix_content_repo: MatrixContentRepository) -> None: """ Deletes the tsv file containing the content of a matrix with a given SHA256 hash. """ # when the data is saved in the repo - data = [[1, 2, 3], [4, 5, 6]] + data: ArrayData = [[1, 2, 3], [4, 5, 6]] matrix_hash = matrix_content_repo.save(data) # then the saved matrix object can be deleted matrix_content_repo.delete(matrix_hash) diff --git a/tests/matrixstore/test_service.py b/tests/matrixstore/test_service.py index a251cda4c5..db26e6403a 100644 --- a/tests/matrixstore/test_service.py +++ b/tests/matrixstore/test_service.py @@ -1,9 +1,10 @@ import datetime import io +import json import time import typing as t +import zipfile from unittest.mock import ANY, Mock -from zipfile import ZIP_DEFLATED, ZipFile import numpy as np import pytest @@ -26,12 +27,14 @@ ) from antarest.matrixstore.service import MatrixService +MatrixType = t.List[t.List[float]] + class TestMatrixService: - def test_create__nominal_case(self, matrix_service: MatrixService): + def test_create__nominal_case(self, matrix_service: MatrixService) -> None: """Creates a new matrix object with the specified data.""" # when a matrix is created (inserted) in the service - data = [[1, 2, 3], [4, 5, 6]] + data: MatrixType = [[1, 2, 3], [4, 5, 6]] matrix_id = matrix_service.create(data) # A "real" hash value is calculated @@ -52,7 +55,7 @@ def test_create__nominal_case(self, matrix_service: MatrixService): now = datetime.datetime.utcnow() assert now - datetime.timedelta(seconds=1) <= obj.created_at <= now - def test_create__from_numpy_array(self, matrix_service: MatrixService): + def test_create__from_numpy_array(self, matrix_service: MatrixService) -> None: """Creates a new matrix object with the specified data.""" # when a matrix is created (inserted) in the service data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64) @@ -76,13 +79,13 @@ def test_create__from_numpy_array(self, matrix_service: MatrixService): now = datetime.datetime.utcnow() assert now - datetime.timedelta(seconds=1) <= obj.created_at <= now - def test_create__side_effect(self, matrix_service: MatrixService): + def test_create__side_effect(self, matrix_service: MatrixService) -> None: """Creates a new matrix object with the specified data, but fail during saving.""" # if the matrix can't be created in the service matrix_repo = matrix_service.repo matrix_repo.save = Mock(side_effect=Exception("database error")) with pytest.raises(Exception, match="database error"): - data = [[1, 2, 3], [4, 5, 6]] + data: MatrixType = [[1, 2, 3], [4, 5, 6]] matrix_service.create(data) # the associated matrix file must not be deleted @@ -94,10 +97,10 @@ def test_create__side_effect(self, matrix_service: MatrixService): with db(): assert not db.session.query(Matrix).count() - def test_get(self, matrix_service): + def test_get(self, matrix_service: MatrixService) -> None: """Get a matrix object from the database and the matrix content repository.""" # when a matrix is created (inserted) in the service - data = [[1, 2, 3], [4, 5, 6]] + data: MatrixType = [[1, 2, 3], [4, 5, 6]] matrix_id = matrix_service.create(data) # nominal_case: we can retrieve the matrix and its content @@ -120,10 +123,10 @@ def test_get(self, matrix_service): obj = matrix_service.get(missing_hash) assert obj is None - def test_exists(self, matrix_service): + def test_exists(self, matrix_service: MatrixService) -> None: """Test the exists method.""" # when a matrix is created (inserted) in the service - data = [[1, 2, 3], [4, 5, 6]] + data: MatrixType = [[1, 2, 3], [4, 5, 6]] matrix_id = matrix_service.create(data) # nominal_case: we can retrieve the matrix and its content @@ -132,10 +135,10 @@ def test_exists(self, matrix_service): missing_hash = "8b1a9953c4611296a827abf8c47804d7e6c49c6b" assert not matrix_service.exists(missing_hash) - def test_delete__nominal_case(self, matrix_service: MatrixService): + def test_delete__nominal_case(self, matrix_service: MatrixService) -> None: """Delete a matrix object from the matrix content repository and the database.""" # when a matrix is created (inserted) in the service - data = [[1, 2, 3], [4, 5, 6]] + data: MatrixType = [[1, 2, 3], [4, 5, 6]] matrix_id = matrix_service.create(data) # When the matrix id deleted @@ -151,7 +154,7 @@ def test_delete__nominal_case(self, matrix_service: MatrixService): with db(): assert not db.session.query(Matrix).count() - def test_delete__missing(self, matrix_service: MatrixService): + def test_delete__missing(self, matrix_service: MatrixService) -> None: """Delete a matrix object from the matrix content repository and the database.""" # When the matrix id deleted with db(): @@ -167,8 +170,139 @@ def test_delete__missing(self, matrix_service: MatrixService): with db(): assert not db.session.query(Matrix).count() + @pytest.mark.parametrize( + "data", + [ + pytest.param([[1, 2, 3], [4, 5, 6]], id="classic-array"), + pytest.param([[]], id="2D-empty-array"), + ], + ) + @pytest.mark.parametrize("content_type", ["application/json", "text/plain"]) + def test_create_by_importation__nominal_case( + self, + matrix_service: MatrixService, + data: MatrixType, + content_type: str, + ) -> None: + """ + Create a new matrix by importing a file. + The file is either a JSON file or a TSV file. + """ + # Prepare the matrix data to import + matrix = np.array(data, dtype=np.float64) + if content_type == "application/json": + # JSON format of the array using the dataframe format + index = list(range(matrix.shape[0])) + columns = list(range(matrix.shape[1])) + content = json.dumps({"index": index, "columns": columns, "data": matrix.tolist()}) + buffer = io.BytesIO(content.encode("utf-8")) + filename = "matrix.json" + json_format = True + else: + # TSV format of the array (without header) + buffer = io.BytesIO() + np.savetxt(buffer, matrix, delimiter="\t") + buffer.seek(0) + filename = "matrix.txt" + json_format = False + + # Prepare a UploadFile object using the buffer + upload_file = _create_upload_file(filename=filename, file=buffer, content_type=content_type) + + # when a matrix is created (inserted) in the service + info_list: t.Sequence[MatrixInfoDTO] = matrix_service.create_by_importation(upload_file, is_json=json_format) + + # Then, check the list of created matrices + assert len(info_list) == 1 + info = info_list[0] -def test_dataset_lifecycle(): + # A "real" hash value is calculated + assert info.id, "ID can't be empty" + + # The matrix is saved in the content repository as a TSV file + bucket_dir = matrix_service.matrix_content_repository.bucket_dir + content_path = bucket_dir.joinpath(f"{info.id}.tsv") + actual = np.loadtxt(content_path) + assert actual.all() == matrix.all() + + # A matrix object is stored in the database + with db(): + obj = matrix_service.repo.get(info.id) + assert obj is not None, f"Missing Matrix object {info.id}" + assert obj.width == matrix.shape[1] + assert obj.height == matrix.shape[0] + now = datetime.datetime.utcnow() + assert now - datetime.timedelta(seconds=1) <= obj.created_at <= now + + @pytest.mark.parametrize("content_type", ["application/json", "text/plain"]) + def test_create_by_importation__zip_file(self, matrix_service: MatrixService, content_type: str) -> None: + """ + Create a ZIP file with several matrices, using either a JSON format or a TSV format. + All matrices of the ZIP file use the same format. + Check that the matrices are correctly imported. + """ + # Prepare the matrix data to import + data_list: t.List[MatrixType] = [ + [[1, 2, 3], [4, 5, 6]], + [[7, 8, 9, 10, 11], [17, 18, 19, 20, 21], [27, 28, 29, 30, 31]], + [[]], + ] + matrix_list: t.List[np.ndarray] = [np.array(data, dtype=np.float64) for data in data_list] + if content_type == "application/json": + # JSON format of the array using the dataframe format + index_list = [list(range(matrix.shape[0])) for matrix in matrix_list] + columns_list = [list(range(matrix.shape[1])) for matrix in matrix_list] + data_list = [matrix.tolist() for matrix in matrix_list] + content_list = [ + json.dumps({"index": index, "columns": columns, "data": data}).encode("utf-8") + for index, columns, data in zip(index_list, columns_list, data_list) + ] + json_format = True + else: + # TSV format of the array (without header) + content_list = [] + for matrix in matrix_list: + buffer = io.BytesIO() + np.savetxt(buffer, matrix, delimiter="\t") + content_list.append(buffer.getvalue()) + json_format = False + + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: + for i, content in enumerate(content_list): + suffix = {True: "json", False: "txt"}[json_format] + zf.writestr(f"matrix-{i:1d}.{suffix}", content) + buffer.seek(0) + + # Prepare a UploadFile object using the buffer + upload_file = _create_upload_file(filename="matrices.zip", file=buffer, content_type="application/zip") + + # When matrices are created (inserted) in the service + info_list: t.Sequence[MatrixInfoDTO] = matrix_service.create_by_importation(upload_file, is_json=json_format) + + # Then, check the list of created matrices + assert len(info_list) == len(data_list) + for info, matrix in zip(info_list, matrix_list): + # A "real" hash value is calculated + assert info.id, "ID can't be empty" + + # The matrix is saved in the content repository as a TSV file + bucket_dir = matrix_service.matrix_content_repository.bucket_dir + content_path = bucket_dir.joinpath(f"{info.id}.tsv") + actual = np.loadtxt(content_path) + assert actual.all() == matrix.all() + + # A matrix object is stored in the database + with db(): + obj = matrix_service.repo.get(info.id) + assert obj is not None, f"Missing Matrix object {info.id}" + assert obj.width == (matrix.shape[1] if matrix.size else 0) + assert obj.height == matrix.shape[0] + now = datetime.datetime.utcnow() + assert now - datetime.timedelta(seconds=1) <= obj.created_at <= now + + +def test_dataset_lifecycle() -> None: content = Mock() repo = Mock() dataset_repo = Mock() @@ -347,7 +481,7 @@ def test_dataset_lifecycle(): dataset_repo.delete.assert_called_once() -def _create_upload_file(filename: str, file: t.IO = None, content_type: str = "") -> UploadFile: +def _create_upload_file(filename: str, file: io.BytesIO, content_type: str = "") -> UploadFile: if hasattr(UploadFile, "content_type"): # `content_type` attribute was replace by a read-ony property in starlette-v0.24. headers = Headers(headers={"content-type": content_type}) @@ -356,54 +490,3 @@ def _create_upload_file(filename: str, file: t.IO = None, content_type: str = "" else: # noinspection PyTypeChecker,PyArgumentList return UploadFile(filename=filename, file=file, content_type=content_type) - - -def test_import(): - # Init Mock - repo_content = Mock() - repo = Mock() - - file_str = "1\t2\t3\t4\t5\n6\t7\t8\t9\t10" - matrix_content = str.encode(file_str) - - # Expected - matrix_id = "123" - exp_matrix_info = [MatrixInfoDTO(id=matrix_id, name="matrix.txt")] - exp_matrix = Matrix(id=matrix_id, width=5, height=2) - # Test - service = MatrixService( - repo=repo, - repo_dataset=Mock(), - matrix_content_repository=repo_content, - file_transfer_manager=Mock(), - task_service=Mock(), - config=Mock(), - user_service=Mock(), - ) - service.repo.get.return_value = None - service.matrix_content_repository.save.return_value = matrix_id - service.repo.save.return_value = exp_matrix - - # CSV importation - matrix_file = _create_upload_file( - filename="matrix.txt", - file=io.BytesIO(matrix_content), - content_type="test/plain", - ) - matrix = service.create_by_importation(matrix_file) - assert matrix[0].name == exp_matrix_info[0].name - assert matrix[0].id is not None - - # Zip importation - zip_content = io.BytesIO() - with ZipFile(zip_content, "w", ZIP_DEFLATED) as output_data: - output_data.writestr("matrix.txt", file_str) - - zip_content.seek(0) - zip_file = _create_upload_file( - filename="Matrix.zip", - file=zip_content, - content_type="application/zip", - ) - matrix = service.create_by_importation(zip_file) - assert matrix == exp_matrix_info diff --git a/tests/storage/repository/filesystem/matrix/output_series_matrix_test.py b/tests/storage/repository/filesystem/matrix/output_series_matrix_test.py index e93b0006f8..d739e73b0d 100644 --- a/tests/storage/repository/filesystem/matrix/output_series_matrix_test.py +++ b/tests/storage/repository/filesystem/matrix/output_series_matrix_test.py @@ -9,7 +9,7 @@ from antarest.study.storage.rawstudy.model.filesystem.matrix.output_series_matrix import OutputSeriesMatrix MATRIX_DAILY_DATA = """\ -DE area va hourly +DE\tarea\tva\thourly \tVARIABLES\tBEGIN\tEND \t2\t1\t2 @@ -21,7 +21,7 @@ """ -def test_get(tmp_path: Path): +def test_get(tmp_path: Path) -> None: file = tmp_path / "matrix-daily.txt" file.write_text("\n\n\n\nmock\tfile\ndummy\tdummy\ndummy\tdummy\ndummy\tdummy") config = FileStudyTreeConfig(study_path=file, path=file, study_id="id", version=-1) @@ -55,7 +55,7 @@ def test_get(tmp_path: Path): assert node.load() == matrix.to_dict(orient="split") -def test_save(tmp_path: Path): +def test_save(tmp_path: Path) -> None: file = tmp_path / "matrix-daily.txt" config = FileStudyTreeConfig(study_path=file, path=file, study_id="id", version=-1)