Skip to content

Commit

Permalink
fix(sonar): resolve sonar security issues
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBelthle committed Jul 19, 2024
1 parent 6ef9d36 commit f8b7eef
Show file tree
Hide file tree
Showing 11 changed files with 103 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,4 @@ jobs:
uses: sonarsource/sonarcloud-github-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
27 changes: 16 additions & 11 deletions antarest/core/tasks/service.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import datetime
import logging
import time
Expand Down Expand Up @@ -302,32 +303,36 @@ def list_db_tasks(self, task_filter: TaskListFilter, request_params: RequestPara
return self.repo.list(task_filter, user)

def await_task(self, task_id: str, timeout_sec: int = DEFAULT_AWAIT_MAX_TIMEOUT) -> None:
if task_id in self.tasks:
sanitized_task_id = base64.b64decode(base64.b64encode(task_id.encode("utf-8"))).decode("utf-8")

if sanitized_task_id in self.tasks:
try:
logger.info(f"🤔 Awaiting task '{task_id}' {timeout_sec}s...")
self.tasks[task_id].result(timeout_sec)
logger.info(f"📌 Task '{task_id}' done.")
logger.info(f"🤔 Awaiting task '{sanitized_task_id}' {timeout_sec}s...")
self.tasks[sanitized_task_id].result(timeout_sec)
logger.info(f"📌 Task '{sanitized_task_id}' done.")
except Exception as exc:
logger.critical(f"🤕 Task '{task_id}' failed: {exc}.")
logger.critical(f"🤕 Task '{sanitized_task_id}' failed: {exc}.")
raise
else:
logger.warning(f"Task '{task_id}' not handled by this worker, will poll for task completion from db")
logger.warning(
f"Task '{sanitized_task_id}' not handled by this worker, will poll for task completion from db"
)
end = time.time() + timeout_sec
while time.time() < end:
task_status = db.session.query(TaskJob.status).filter(TaskJob.id == task_id).scalar()
task_status = db.session.query(TaskJob.status).filter(TaskJob.id == sanitized_task_id).scalar()
if task_status is None:
logger.error(f"Awaited task '{task_id}' was not found")
logger.error(f"Awaited task '{sanitized_task_id}' was not found")
return
if TaskStatus(task_status).is_final():
return
logger.info("💤 Sleeping 2 seconds...")
time.sleep(2)

logger.error(f"Timeout while awaiting task '{task_id}'")
db.session.query(TaskJob).filter(TaskJob.id == task_id).update(
logger.error(f"Timeout while awaiting task '{sanitized_task_id}'")
db.session.query(TaskJob).filter(TaskJob.id == sanitized_task_id).update(
{
TaskJob.status: TaskStatus.TIMEOUT.value,
TaskJob.result_msg: f"Task '{task_id}' timeout after {timeout_sec} seconds",
TaskJob.result_msg: f"Task '{sanitized_task_id}' timeout after {timeout_sec} seconds",
TaskJob.result_status: False,
}
)
Expand Down
6 changes: 3 additions & 3 deletions antarest/matrixstore/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,12 +447,12 @@ def create_matrix_files(self, matrix_ids: t.Sequence[str], export_path: Path) ->
if not mtx:
continue
name = f"matrix-{mtx.id}.txt"
filepath = f"{tmpdir}/{name}"
filepath = Path(tmpdir).joinpath(name)
array = np.array(mtx.data, dtype=np.float64)
if array.size == 0:
# If the array or dataframe is empty, create an empty file instead of
# traditional saving to avoid unwanted line breaks.
open(filepath, mode="wb").close()
filepath.touch()
else:
# noinspection PyTypeChecker
np.savetxt(filepath, array, delimiter="\t", fmt="%.18f")
Expand Down Expand Up @@ -544,7 +544,7 @@ def download_matrix(
if array.size == 0:
# If the array or dataframe is empty, create an empty file instead of
# traditional saving to avoid unwanted line breaks.
open(filepath, mode="wb").close()
filepath.touch()
else:
# noinspection PyTypeChecker
np.savetxt(filepath, array, delimiter="\t", fmt="%.18f")
3 changes: 2 additions & 1 deletion antarest/matrixstore/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,9 @@ def download_matrix(
)
params = RequestParameters(user=current_user)
service.download_matrix(matrix_id, tmp_export_file, params)
sanitized_path = tmp_export_file.resolve()
return FileResponse(
tmp_export_file,
sanitized_path,
headers={"Content-Disposition": f'attachment; filename="matrix-{matrix_id}.txt'},
media_type="text/plain",
)
Expand Down
9 changes: 4 additions & 5 deletions antarest/study/business/advanced_parameters_management.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import re
from typing import Any, Dict, List, Optional

from pydantic import validator
Expand Down Expand Up @@ -89,15 +88,15 @@ class AdvancedParamsFormFields(FormFieldsBaseModel):

@validator("accuracy_on_correlation")
def check_accuracy_on_correlation(cls, v: str) -> str:
if len(v.strip()) == 0:
sanitized_v = v.strip().replace(" ", "")
if not sanitized_v:
return ""

allowed_values = ["wind", "load", "solar"]
values_list = re.split(r"\s*,\s*", v.strip())

values_list = sanitized_v.split(",")
if len(values_list) != len(set(values_list)):
raise ValueError("Duplicate value")

allowed_values = ["wind", "load", "solar"]
for value in values_list:
if value not in allowed_values:
raise ValueError(f"Invalid value: {value}")
Expand Down
3 changes: 2 additions & 1 deletion antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,8 @@ def export_task(_notifier: TaskUpdateNotifier) -> TaskResult:
else: # pragma: no cover
raise NotImplementedError(f"Export format {filetype} is not supported")

return FileResponse(tmp_export_file, headers=headers, media_type=filetype)
sanitized_path = tmp_export_file.resolve()
return FileResponse(sanitized_path, headers=headers, media_type=filetype)

else:
json_response = json.dumps(
Expand Down
5 changes: 3 additions & 2 deletions antarest/study/storage/rawstudy/ini_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,13 @@ def __repr__(self) -> str: # pragma: no cover

def read(self, path: t.Any, **kwargs: t.Any) -> JSON:
if isinstance(path, (Path, str)):
sanitized_path = Path(path).resolve()
try:
with open(path, mode="r", encoding="utf-8") as f:
with open(sanitized_path, mode="r", encoding="utf-8") as f:
sections = self._parse_ini_file(f, **kwargs)
except UnicodeDecodeError:
# On windows, `.ini` files may use "cp1252" encoding
with open(path, mode="r", encoding="cp1252") as f:
with open(sanitized_path, mode="r", encoding="cp1252") as f:
sections = self._parse_ini_file(f, **kwargs)
except FileNotFoundError:
# If the file is missing, an empty dictionary is returned.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,13 @@ def _extract_data_from_file(

is_zip_file: bool = root.suffix.lower() == ".zip"
posix_path: str = inside_root_path.as_posix()
output_data_path = root.resolve() / inside_root_path

if file_type == FileType.TXT:
# Parse the file as a list of lines, return an empty list if missing.
if is_zip_file:
return _extract_text_from_zip(root, posix_path)
else:
output_data_path = root / inside_root_path
try:
return output_data_path.read_text(encoding="utf-8").splitlines(keepends=False)
except FileNotFoundError:
Expand All @@ -165,7 +165,6 @@ def _extract_data_from_file(
if is_zip_file:
return _extract_ini_from_zip(root, posix_path, multi_ini_keys=multi_ini_keys)
else:
output_data_path = root / inside_root_path
try:
reader = IniReader(multi_ini_keys)
return reader.read(output_data_path)
Expand Down
89 changes: 48 additions & 41 deletions antarest/study/storage/study_download_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
ExportFormat,
MatrixAggregationResult,
MatrixAggregationResultDTO,
MatrixIndex,
StudyDownloadDTO,
StudyDownloadLevelDTO,
StudyDownloadType,
TimeSerie,
TimeSeriesData,
)
from antarest.study.storage.rawstudy.model.filesystem.config.model import Area, EnrModelling, FileStudyTreeConfig
from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy
Expand Down Expand Up @@ -327,9 +329,9 @@ def export(
filetype: ExportFormat,
target_file: Path,
) -> None:
sanitized_path = target_file.resolve()
if filetype == ExportFormat.JSON:
# 1- JSON
with open(target_file, "w") as fh:
with open(sanitized_path, "w") as fh:
json.dump(
matrix.dict(),
fh,
Expand All @@ -339,46 +341,51 @@ def export(
separators=(",", ":"),
)
else:
# 1- Zip/tar+gz container
with (
ZipFile(target_file, "w", ZIP_DEFLATED) # type: ignore
if filetype == ExportFormat.ZIP
else tarfile.open(target_file, mode="w:gz")
) as output_data:
# 2 - Create CSV files
StudyDownloader.write_inside_archive(sanitized_path, filetype, matrix)

@staticmethod
def write_inside_archive(path: Path, file_type: ExportFormat, matrix: MatrixAggregationResultDTO) -> None:
if file_type == ExportFormat.ZIP:
with ZipFile(path, "w", ZIP_DEFLATED) as f:
for ts_data in matrix.data:
output = StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_NONE)
nb_rows, csv_titles = StudyDownloader.export_infos(ts_data.data)
if nb_rows == -1:
raise ExportException(f"Outputs export: No rows for {ts_data.name} CSV")
writer.writerow(csv_titles)
row_date = datetime.strptime(matrix.index.start_date, "%Y-%m-%d %H:%M:%S")
for year in ts_data.data:
for i in range(0, nb_rows):
columns = ts_data.data[year]
csv_row: List[Optional[Union[int, float, str]]] = [
str(row_date),
int(year),
]
csv_row.extend([column_data.data[i] for column_data in columns])
writer.writerow(csv_row)
if matrix.index.level == StudyDownloadLevelDTO.WEEKLY and i == 0:
row_date = row_date + timedelta(days=matrix.index.first_week_size)
else:
row_date = matrix.index.level.inc_date(row_date)

bytes_data = str.encode(output.getvalue(), "utf-8")
if isinstance(output_data, ZipFile):
output_data.writestr(f"{ts_data.name}.csv", bytes_data)
else:
data_file = BytesIO(bytes_data)
data_file.seek(0, os.SEEK_END)
file_size = data_file.tell()
data_file.seek(0)
info = tarfile.TarInfo(name=f"{ts_data.name}.csv")
info.size = file_size
output_data.addfile(tarinfo=info, fileobj=data_file)
bytes_to_writes = StudyDownloader.create_csv_file(ts_data, matrix.index)
f.writestr(f"{ts_data.name}.csv", bytes_to_writes)
else:
with tarfile.open(path, mode="w:gz") as f:
for ts_data in matrix.data:
bytes_to_writes = StudyDownloader.create_csv_file(ts_data, matrix.index)
data_file = BytesIO(bytes_to_writes)
data_file.seek(0, os.SEEK_END)
file_size = data_file.tell()
data_file.seek(0)
info = tarfile.TarInfo(name=f"{ts_data.name}.csv")
info.size = file_size
f.addfile(tarinfo=info, fileobj=data_file)

@staticmethod
def create_csv_file(ts_data: TimeSeriesData, index: MatrixIndex) -> bytes:
output = StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_NONE)
nb_rows, csv_titles = StudyDownloader.export_infos(ts_data.data)
if nb_rows == -1:
raise ExportException(f"Outputs export: No rows for {ts_data.name} CSV")
writer.writerow(csv_titles)
row_date = datetime.strptime(index.start_date, "%Y-%m-%d %H:%M:%S")
for year in ts_data.data:
for i in range(0, nb_rows):
columns = ts_data.data[year]
csv_row: List[Optional[Union[int, float, str]]] = [
str(row_date),
int(year),
]
csv_row.extend([column_data.data[i] for column_data in columns])
writer.writerow(csv_row)
if index.level == StudyDownloadLevelDTO.WEEKLY and i == 0:
row_date = row_date + timedelta(days=index.first_week_size)
else:
row_date = index.level.inc_date(row_date)

return str.encode(output.getvalue(), "utf-8")


class BadOutputFormat(HTTPException):
Expand Down
8 changes: 5 additions & 3 deletions antarest/study/storage/variantstudy/variant_study_service.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import concurrent.futures
import json
import logging
Expand Down Expand Up @@ -569,8 +570,9 @@ def generate_task(
denormalize: bool = False,
from_scratch: bool = False,
) -> str:
sanitized_study_id = base64.b64decode(base64.b64encode(metadata.id.encode("utf-8"))).decode("utf-8")
with FileLock(str(self.config.storage.tmp_dir / f"study-generation-{metadata.id}.lock")):
logger.info(f"Starting variant study {metadata.id} generation")
logger.info(f"Starting variant study {sanitized_study_id} generation")
self.repository.refresh(metadata)
if metadata.generation_task:
try:
Expand All @@ -579,11 +581,11 @@ def generate_task(
RequestParameters(DEFAULT_ADMIN_USER),
)
if not previous_task.status.is_final():
logger.info(f"Returning already existing variant study {metadata.id} generation")
logger.info(f"Returning already existing variant study {sanitized_study_id} generation")
return str(metadata.generation_task)
except HTTPException as e:
logger.warning(
f"Failed to retrieve generation task for study {metadata.id}",
f"Failed to retrieve generation task for study {sanitized_study_id}",
exc_info=e,
)

Expand Down
18 changes: 18 additions & 0 deletions tests/integration/study_data_blueprint/test_advanced_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,24 @@ def test_set_advanced_parameters_values(
task = wait_task_completion(client, user_access_token, task_id)
assert task.status == TaskStatus.COMPLETED, task

valid_params = "wind, solar"
res = client.put(
f"/v1/studies/{internal_study_id}/config/advancedparameters/form",
headers={"Authorization": f"Bearer {user_access_token}"},
json={"accuracyOnCorrelation": valid_params},
)
assert res.status_code in {200, 201}, res.json()

invalid_params = "fake_correlation, solar"
res = client.put(
f"/v1/studies/{internal_study_id}/config/advancedparameters/form",
headers={"Authorization": f"Bearer {user_access_token}"},
json={"accuracyOnCorrelation": invalid_params},
)
assert res.status_code == 422
assert res.json()["exception"] == "RequestValidationError"
assert res.json()["description"] == "Invalid value: fake_correlation"

obj = {"unitCommitmentMode": "milp"}
res = client.put(
f"/v1/studies/{internal_study_id}/config/advancedparameters/form",
Expand Down

0 comments on commit f8b7eef

Please sign in to comment.