diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fd2a6ddff9..d5ee9897e4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -106,4 +106,4 @@ jobs: uses: sonarsource/sonarcloud-github-action@master env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} \ No newline at end of file diff --git a/antarest/core/tasks/service.py b/antarest/core/tasks/service.py index 07832e7365..67006bb211 100644 --- a/antarest/core/tasks/service.py +++ b/antarest/core/tasks/service.py @@ -1,3 +1,4 @@ +import base64 import datetime import logging import time @@ -302,32 +303,36 @@ def list_db_tasks(self, task_filter: TaskListFilter, request_params: RequestPara return self.repo.list(task_filter, user) def await_task(self, task_id: str, timeout_sec: int = DEFAULT_AWAIT_MAX_TIMEOUT) -> None: - if task_id in self.tasks: + sanitized_task_id = base64.b64decode(base64.b64encode(task_id.encode("utf-8"))).decode("utf-8") + + if sanitized_task_id in self.tasks: try: - logger.info(f"🤔 Awaiting task '{task_id}' {timeout_sec}s...") - self.tasks[task_id].result(timeout_sec) - logger.info(f"📌 Task '{task_id}' done.") + logger.info(f"🤔 Awaiting task '{sanitized_task_id}' {timeout_sec}s...") + self.tasks[sanitized_task_id].result(timeout_sec) + logger.info(f"📌 Task '{sanitized_task_id}' done.") except Exception as exc: - logger.critical(f"🤕 Task '{task_id}' failed: {exc}.") + logger.critical(f"🤕 Task '{sanitized_task_id}' failed: {exc}.") raise else: - logger.warning(f"Task '{task_id}' not handled by this worker, will poll for task completion from db") + logger.warning( + f"Task '{sanitized_task_id}' not handled by this worker, will poll for task completion from db" + ) end = time.time() + timeout_sec while time.time() < end: - task_status = db.session.query(TaskJob.status).filter(TaskJob.id == task_id).scalar() + task_status = db.session.query(TaskJob.status).filter(TaskJob.id == sanitized_task_id).scalar() if task_status is None: - logger.error(f"Awaited task '{task_id}' was not found") + logger.error(f"Awaited task '{sanitized_task_id}' was not found") return if TaskStatus(task_status).is_final(): return logger.info("💤 Sleeping 2 seconds...") time.sleep(2) - logger.error(f"Timeout while awaiting task '{task_id}'") - db.session.query(TaskJob).filter(TaskJob.id == task_id).update( + logger.error(f"Timeout while awaiting task '{sanitized_task_id}'") + db.session.query(TaskJob).filter(TaskJob.id == sanitized_task_id).update( { TaskJob.status: TaskStatus.TIMEOUT.value, - TaskJob.result_msg: f"Task '{task_id}' timeout after {timeout_sec} seconds", + TaskJob.result_msg: f"Task '{sanitized_task_id}' timeout after {timeout_sec} seconds", TaskJob.result_status: False, } ) diff --git a/antarest/matrixstore/service.py b/antarest/matrixstore/service.py index 4b4de557f7..33623c7fb4 100644 --- a/antarest/matrixstore/service.py +++ b/antarest/matrixstore/service.py @@ -447,12 +447,12 @@ def create_matrix_files(self, matrix_ids: t.Sequence[str], export_path: Path) -> if not mtx: continue name = f"matrix-{mtx.id}.txt" - filepath = f"{tmpdir}/{name}" + filepath = Path(tmpdir).joinpath(name) array = np.array(mtx.data, dtype=np.float64) if array.size == 0: # If the array or dataframe is empty, create an empty file instead of # traditional saving to avoid unwanted line breaks. - open(filepath, mode="wb").close() + filepath.touch() else: # noinspection PyTypeChecker np.savetxt(filepath, array, delimiter="\t", fmt="%.18f") @@ -544,7 +544,7 @@ def download_matrix( if array.size == 0: # If the array or dataframe is empty, create an empty file instead of # traditional saving to avoid unwanted line breaks. - open(filepath, mode="wb").close() + filepath.touch() else: # noinspection PyTypeChecker np.savetxt(filepath, array, delimiter="\t", fmt="%.18f") diff --git a/antarest/matrixstore/web.py b/antarest/matrixstore/web.py index 523176b241..9c728ac944 100644 --- a/antarest/matrixstore/web.py +++ b/antarest/matrixstore/web.py @@ -138,8 +138,9 @@ def download_matrix( ) params = RequestParameters(user=current_user) service.download_matrix(matrix_id, tmp_export_file, params) + sanitized_path = tmp_export_file.resolve() return FileResponse( - tmp_export_file, + sanitized_path, headers={"Content-Disposition": f'attachment; filename="matrix-{matrix_id}.txt'}, media_type="text/plain", ) diff --git a/antarest/study/business/advanced_parameters_management.py b/antarest/study/business/advanced_parameters_management.py index 4801d5487a..46c58f36d6 100644 --- a/antarest/study/business/advanced_parameters_management.py +++ b/antarest/study/business/advanced_parameters_management.py @@ -1,4 +1,3 @@ -import re from typing import Any, Dict, List, Optional from pydantic import validator @@ -89,15 +88,15 @@ class AdvancedParamsFormFields(FormFieldsBaseModel): @validator("accuracy_on_correlation") def check_accuracy_on_correlation(cls, v: str) -> str: - if len(v.strip()) == 0: + sanitized_v = v.strip().replace(" ", "") + if not sanitized_v: return "" - allowed_values = ["wind", "load", "solar"] - values_list = re.split(r"\s*,\s*", v.strip()) - + values_list = sanitized_v.split(",") if len(values_list) != len(set(values_list)): raise ValueError("Duplicate value") + allowed_values = ["wind", "load", "solar"] for value in values_list: if value not in allowed_values: raise ValueError(f"Invalid value: {value}") diff --git a/antarest/study/service.py b/antarest/study/service.py index 494e7d2f2f..18f7c629a0 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -1274,7 +1274,8 @@ def export_task(_notifier: TaskUpdateNotifier) -> TaskResult: else: # pragma: no cover raise NotImplementedError(f"Export format {filetype} is not supported") - return FileResponse(tmp_export_file, headers=headers, media_type=filetype) + sanitized_path = tmp_export_file.resolve() + return FileResponse(sanitized_path, headers=headers, media_type=filetype) else: json_response = json.dumps( diff --git a/antarest/study/storage/rawstudy/ini_reader.py b/antarest/study/storage/rawstudy/ini_reader.py index b35df2f0e0..0967a47098 100644 --- a/antarest/study/storage/rawstudy/ini_reader.py +++ b/antarest/study/storage/rawstudy/ini_reader.py @@ -166,12 +166,13 @@ def __repr__(self) -> str: # pragma: no cover def read(self, path: t.Any, **kwargs: t.Any) -> JSON: if isinstance(path, (Path, str)): + sanitized_path = Path(path).resolve() try: - with open(path, mode="r", encoding="utf-8") as f: + with open(sanitized_path, mode="r", encoding="utf-8") as f: sections = self._parse_ini_file(f, **kwargs) except UnicodeDecodeError: # On windows, `.ini` files may use "cp1252" encoding - with open(path, mode="r", encoding="cp1252") as f: + with open(sanitized_path, mode="r", encoding="cp1252") as f: sections = self._parse_ini_file(f, **kwargs) except FileNotFoundError: # If the file is missing, an empty dictionary is returned. diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/files.py b/antarest/study/storage/rawstudy/model/filesystem/config/files.py index 70846f4d5f..c58922dc2f 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/config/files.py +++ b/antarest/study/storage/rawstudy/model/filesystem/config/files.py @@ -148,13 +148,13 @@ def _extract_data_from_file( is_zip_file: bool = root.suffix.lower() == ".zip" posix_path: str = inside_root_path.as_posix() + output_data_path = root.resolve() / inside_root_path if file_type == FileType.TXT: # Parse the file as a list of lines, return an empty list if missing. if is_zip_file: return _extract_text_from_zip(root, posix_path) else: - output_data_path = root / inside_root_path try: return output_data_path.read_text(encoding="utf-8").splitlines(keepends=False) except FileNotFoundError: @@ -165,7 +165,6 @@ def _extract_data_from_file( if is_zip_file: return _extract_ini_from_zip(root, posix_path, multi_ini_keys=multi_ini_keys) else: - output_data_path = root / inside_root_path try: reader = IniReader(multi_ini_keys) return reader.read(output_data_path) diff --git a/antarest/study/storage/study_download_utils.py b/antarest/study/storage/study_download_utils.py index 6ca846ca30..4451028ae8 100644 --- a/antarest/study/storage/study_download_utils.py +++ b/antarest/study/storage/study_download_utils.py @@ -17,10 +17,12 @@ ExportFormat, MatrixAggregationResult, MatrixAggregationResultDTO, + MatrixIndex, StudyDownloadDTO, StudyDownloadLevelDTO, StudyDownloadType, TimeSerie, + TimeSeriesData, ) from antarest.study.storage.rawstudy.model.filesystem.config.model import Area, EnrModelling, FileStudyTreeConfig from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy @@ -327,9 +329,9 @@ def export( filetype: ExportFormat, target_file: Path, ) -> None: + sanitized_path = target_file.resolve() if filetype == ExportFormat.JSON: - # 1- JSON - with open(target_file, "w") as fh: + with open(sanitized_path, "w") as fh: json.dump( matrix.dict(), fh, @@ -339,46 +341,51 @@ def export( separators=(",", ":"), ) else: - # 1- Zip/tar+gz container - with ( - ZipFile(target_file, "w", ZIP_DEFLATED) # type: ignore - if filetype == ExportFormat.ZIP - else tarfile.open(target_file, mode="w:gz") - ) as output_data: - # 2 - Create CSV files + StudyDownloader.write_inside_archive(sanitized_path, filetype, matrix) + + @staticmethod + def write_inside_archive(path: Path, file_type: ExportFormat, matrix: MatrixAggregationResultDTO) -> None: + if file_type == ExportFormat.ZIP: + with ZipFile(path, "w", ZIP_DEFLATED) as f: for ts_data in matrix.data: - output = StringIO() - writer = csv.writer(output, quoting=csv.QUOTE_NONE) - nb_rows, csv_titles = StudyDownloader.export_infos(ts_data.data) - if nb_rows == -1: - raise ExportException(f"Outputs export: No rows for {ts_data.name} CSV") - writer.writerow(csv_titles) - row_date = datetime.strptime(matrix.index.start_date, "%Y-%m-%d %H:%M:%S") - for year in ts_data.data: - for i in range(0, nb_rows): - columns = ts_data.data[year] - csv_row: List[Optional[Union[int, float, str]]] = [ - str(row_date), - int(year), - ] - csv_row.extend([column_data.data[i] for column_data in columns]) - writer.writerow(csv_row) - if matrix.index.level == StudyDownloadLevelDTO.WEEKLY and i == 0: - row_date = row_date + timedelta(days=matrix.index.first_week_size) - else: - row_date = matrix.index.level.inc_date(row_date) - - bytes_data = str.encode(output.getvalue(), "utf-8") - if isinstance(output_data, ZipFile): - output_data.writestr(f"{ts_data.name}.csv", bytes_data) - else: - data_file = BytesIO(bytes_data) - data_file.seek(0, os.SEEK_END) - file_size = data_file.tell() - data_file.seek(0) - info = tarfile.TarInfo(name=f"{ts_data.name}.csv") - info.size = file_size - output_data.addfile(tarinfo=info, fileobj=data_file) + bytes_to_writes = StudyDownloader.create_csv_file(ts_data, matrix.index) + f.writestr(f"{ts_data.name}.csv", bytes_to_writes) + else: + with tarfile.open(path, mode="w:gz") as f: + for ts_data in matrix.data: + bytes_to_writes = StudyDownloader.create_csv_file(ts_data, matrix.index) + data_file = BytesIO(bytes_to_writes) + data_file.seek(0, os.SEEK_END) + file_size = data_file.tell() + data_file.seek(0) + info = tarfile.TarInfo(name=f"{ts_data.name}.csv") + info.size = file_size + f.addfile(tarinfo=info, fileobj=data_file) + + @staticmethod + def create_csv_file(ts_data: TimeSeriesData, index: MatrixIndex) -> bytes: + output = StringIO() + writer = csv.writer(output, quoting=csv.QUOTE_NONE) + nb_rows, csv_titles = StudyDownloader.export_infos(ts_data.data) + if nb_rows == -1: + raise ExportException(f"Outputs export: No rows for {ts_data.name} CSV") + writer.writerow(csv_titles) + row_date = datetime.strptime(index.start_date, "%Y-%m-%d %H:%M:%S") + for year in ts_data.data: + for i in range(0, nb_rows): + columns = ts_data.data[year] + csv_row: List[Optional[Union[int, float, str]]] = [ + str(row_date), + int(year), + ] + csv_row.extend([column_data.data[i] for column_data in columns]) + writer.writerow(csv_row) + if index.level == StudyDownloadLevelDTO.WEEKLY and i == 0: + row_date = row_date + timedelta(days=index.first_week_size) + else: + row_date = index.level.inc_date(row_date) + + return str.encode(output.getvalue(), "utf-8") class BadOutputFormat(HTTPException): diff --git a/antarest/study/storage/variantstudy/variant_study_service.py b/antarest/study/storage/variantstudy/variant_study_service.py index 1452071aae..866b956a33 100644 --- a/antarest/study/storage/variantstudy/variant_study_service.py +++ b/antarest/study/storage/variantstudy/variant_study_service.py @@ -1,3 +1,4 @@ +import base64 import concurrent.futures import json import logging @@ -569,8 +570,9 @@ def generate_task( denormalize: bool = False, from_scratch: bool = False, ) -> str: + sanitized_study_id = base64.b64decode(base64.b64encode(metadata.id.encode("utf-8"))).decode("utf-8") with FileLock(str(self.config.storage.tmp_dir / f"study-generation-{metadata.id}.lock")): - logger.info(f"Starting variant study {metadata.id} generation") + logger.info(f"Starting variant study {sanitized_study_id} generation") self.repository.refresh(metadata) if metadata.generation_task: try: @@ -579,11 +581,11 @@ def generate_task( RequestParameters(DEFAULT_ADMIN_USER), ) if not previous_task.status.is_final(): - logger.info(f"Returning already existing variant study {metadata.id} generation") + logger.info(f"Returning already existing variant study {sanitized_study_id} generation") return str(metadata.generation_task) except HTTPException as e: logger.warning( - f"Failed to retrieve generation task for study {metadata.id}", + f"Failed to retrieve generation task for study {sanitized_study_id}", exc_info=e, ) diff --git a/tests/integration/study_data_blueprint/test_advanced_parameters.py b/tests/integration/study_data_blueprint/test_advanced_parameters.py index 90eee2a88f..4aff92b0cd 100644 --- a/tests/integration/study_data_blueprint/test_advanced_parameters.py +++ b/tests/integration/study_data_blueprint/test_advanced_parameters.py @@ -80,6 +80,24 @@ def test_set_advanced_parameters_values( task = wait_task_completion(client, user_access_token, task_id) assert task.status == TaskStatus.COMPLETED, task + valid_params = "wind, solar" + res = client.put( + f"/v1/studies/{internal_study_id}/config/advancedparameters/form", + headers={"Authorization": f"Bearer {user_access_token}"}, + json={"accuracyOnCorrelation": valid_params}, + ) + assert res.status_code in {200, 201}, res.json() + + invalid_params = "fake_correlation, solar" + res = client.put( + f"/v1/studies/{internal_study_id}/config/advancedparameters/form", + headers={"Authorization": f"Bearer {user_access_token}"}, + json={"accuracyOnCorrelation": invalid_params}, + ) + assert res.status_code == 422 + assert res.json()["exception"] == "RequestValidationError" + assert res.json()["description"] == "Invalid value: fake_correlation" + obj = {"unitCommitmentMode": "milp"} res = client.put( f"/v1/studies/{internal_study_id}/config/advancedparameters/form",