From e4986343eac7833d7420b1b251a77cad13e4c344 Mon Sep 17 00:00:00 2001 From: Gautzilla <72027971+Gautzilla@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:31:35 +0100 Subject: [PATCH] remove umask calls (#234) * remove umask calls * add umask setting to job files --- src/OSmOSE/Spectrogram.py | 9 +++++---- src/OSmOSE/cluster/audio_reshaper.py | 18 ++++++++++++++++-- src/OSmOSE/cluster/compute_statistics.py | 14 +++++++++++--- src/OSmOSE/cluster/merge_timestamp_csv.py | 11 +++++++++-- src/OSmOSE/cluster/resample.py | 2 -- src/OSmOSE/job.py | 5 ++--- src/OSmOSE/utils/core_utils.py | 11 +++++++---- 7 files changed, 50 insertions(+), 20 deletions(-) diff --git a/src/OSmOSE/Spectrogram.py b/src/OSmOSE/Spectrogram.py index 3738db4c..8c84baa8 100644 --- a/src/OSmOSE/Spectrogram.py +++ b/src/OSmOSE/Spectrogram.py @@ -31,8 +31,8 @@ from OSmOSE.utils.core_utils import ( chmod_if_needed, get_timestamp_of_audio_file, + get_umask, safe_read, - set_umask, ) from OSmOSE.utils.path_utils import make_path @@ -479,7 +479,6 @@ def __build_path( dry: `bool`, optional If set to True, will not create the folders and just return the file path. """ - set_umask() processed_path = self.path / OSMOSE_PATH.spectrogram audio_foldername = f"{self.spectro_duration!s}_{self.dataset_sr!s}" self.audio_path = self.path / OSMOSE_PATH.raw_audio / audio_foldername @@ -803,6 +802,7 @@ def initialize( --batch-ind-min {i_min}\ --batch-ind-max {i_max}\ --concat {self.concat}\ + --umask {get_umask()}\ {'--verbose' if self.verbose else ''}", jobname=f"reshape_{batch}", preset="low", @@ -822,7 +822,8 @@ def initialize( if not self.__local: self.jb.build_job_file( script_path=Path(inspect.getfile(merge_timestamp_csv)).resolve(), - script_args=f"--input-files {self.audio_path}", + script_args=f"--input-files {self.audio_path}\ + --umask {get_umask()}", jobname="merge_timestamp", preset="low", mem="30G", @@ -899,6 +900,7 @@ def initialize( --hp-filter-min-freq {self.hp_filter_min_freq}\ --batch-ind-min {i_min}\ --batch-ind-max {i_max}\ + --umask {get_umask()}\ --output-file {self.path / OSMOSE_PATH.statistics / f'SummaryStats_{i_min}.csv'}", jobname="OSmOSE_get_zscore_params", preset="low", @@ -1121,7 +1123,6 @@ def process_file( self.save_for_LTAS = save_for_LTAS else: - set_umask() try: if clean_adjust_folder and ( ( diff --git a/src/OSmOSE/cluster/audio_reshaper.py b/src/OSmOSE/cluster/audio_reshaper.py index ef5bf40b..54cc7530 100644 --- a/src/OSmOSE/cluster/audio_reshaper.py +++ b/src/OSmOSE/cluster/audio_reshaper.py @@ -1,3 +1,4 @@ +import os from argparse import ArgumentParser from pathlib import Path @@ -8,7 +9,7 @@ from OSmOSE.config import DPDEFAULT, FPDEFAULT from OSmOSE.utils.audio_utils import get_all_audio_files -from OSmOSE.utils.core_utils import chmod_if_needed, set_umask +from OSmOSE.utils.core_utils import chmod_if_needed from OSmOSE.utils.path_utils import make_path from OSmOSE.utils.timestamp_utils import to_timestamp @@ -31,6 +32,7 @@ def reshape( verbose: bool = False, overwrite: bool = True, threshold: int = 5, + umask: int = 0o002, ): """Reshape all audio files in the folder to be of the specified duration and/or sampling rate. @@ -89,11 +91,16 @@ def reshape( threshold : int, optional Integer from 0 to 100 to filter out segments with a number of sample inferior to (threshold * spectrogram duration * new_sr) + + umask : int, optional + The umask to apply on the created files permissions. Default is 0o002. + """ - set_umask() segment_duration = pd.Timedelta(seconds=segment_size) msg_log = "" + os.umask(umask) + # validation for threshold if not (0 <= threshold <= 100): raise ValueError( @@ -458,6 +465,12 @@ def reshape( default=-1, help="Sampling rate", ) + parser.add_argument( + "--umask", + type=int, + default=0o002, + help="Umask to apply on the created files permissions.", + ) args = parser.parse_args() @@ -486,4 +499,5 @@ def reshape( verbose=args.verbose, overwrite=args.overwrite, threshold=args.threshold, + umask=args.umask, ) diff --git a/src/OSmOSE/cluster/compute_statistics.py b/src/OSmOSE/cluster/compute_statistics.py index c65dff78..e61a8fa6 100755 --- a/src/OSmOSE/cluster/compute_statistics.py +++ b/src/OSmOSE/cluster/compute_statistics.py @@ -1,5 +1,6 @@ import argparse import csv +import os import sys from pathlib import Path @@ -7,7 +8,7 @@ import soundfile as sf from scipy import signal -from OSmOSE.utils.core_utils import get_timestamp_of_audio_file, set_umask +from OSmOSE.utils.core_utils import get_timestamp_of_audio_file def Write_zscore_norma_params( @@ -17,6 +18,7 @@ def Write_zscore_norma_params( hp_filter_min_freq: int, batch_ind_min: int = 0, batch_ind_max: int = -1, + umask: int = 0o002, ): """Computes the normalization parameters for the Zscore normalisation of the dataset and writes it to a csv. @@ -42,8 +44,7 @@ def Write_zscore_norma_params( The last file of the list to be processed. Default is -1, meaning the entire list is processed. """ - set_umask() - + os.umask(umask) all_files = sorted(Path(input_dir).glob("*wav")) # If batch_ind_max is -1, we go to the end of the list. wav_list = all_files[ @@ -123,6 +124,12 @@ def Write_zscore_norma_params( default=-1, help="The last file to consider. -1 means consider all files from ind-min. Default is -1", ) + parser.add_argument( + "--umask", + type=int, + default=0o002, + help="Umask to apply on the created files permissions.", + ) args = parser.parse_args() @@ -132,4 +139,5 @@ def Write_zscore_norma_params( hp_filter_min_freq=args.hp_filter_min_freq, batch_ind_min=args.batch_ind_min, batch_ind_max=args.batch_ind_max, + umask=args.umask, ) diff --git a/src/OSmOSE/cluster/merge_timestamp_csv.py b/src/OSmOSE/cluster/merge_timestamp_csv.py index 204d1403..70286fc8 100644 --- a/src/OSmOSE/cluster/merge_timestamp_csv.py +++ b/src/OSmOSE/cluster/merge_timestamp_csv.py @@ -6,7 +6,8 @@ import pandas as pd -def merge_timestamp_csv(input_files: str): +def merge_timestamp_csv(input_files: str, umask: int): + os.umask(umask) input_dir_path = Path(input_files) list_conca_timestamps = [] @@ -36,6 +37,12 @@ def merge_timestamp_csv(input_files: str): "-i", help="The files to be reshaped, as either the path to a directory containing audio files and a timestamp.csv or a list of filenames all in the same directory alongside a timestamp.csv.", ) + parser.add_argument( + "--umask", + type=int, + default=0o002, + help="Umask to apply on the created files permissions.", + ) args = parser.parse_args() @@ -45,4 +52,4 @@ def merge_timestamp_csv(input_files: str): else args.input_files ) - files = merge_timestamp_csv(input_files=input_files) + files = merge_timestamp_csv(input_files=input_files, umask=args.umask) diff --git a/src/OSmOSE/cluster/resample.py b/src/OSmOSE/cluster/resample.py index cb658304..c044bcbe 100755 --- a/src/OSmOSE/cluster/resample.py +++ b/src/OSmOSE/cluster/resample.py @@ -4,7 +4,6 @@ from pathlib import Path from OSmOSE.utils.audio_utils import get_all_audio_files -from OSmOSE.utils.core_utils import set_umask def resample( @@ -31,7 +30,6 @@ def resample( The index of the last file of the batch. The default is -1, meaning the last file of the input directory. """ - set_umask() if platform.system() == "Windows": print("Sox is unavailable on Windows") return diff --git a/src/OSmOSE/job.py b/src/OSmOSE/job.py index deac12e7..5bc40f7a 100755 --- a/src/OSmOSE/job.py +++ b/src/OSmOSE/job.py @@ -15,7 +15,7 @@ import tomlkit from OSmOSE.config import DPDEFAULT, FPDEFAULT -from OSmOSE.utils.core_utils import chmod_if_needed, read_config, set_umask +from OSmOSE.utils.core_utils import chmod_if_needed, read_config JOB_CONFIG_TEMPLATE = namedtuple( "job_config", @@ -277,7 +277,6 @@ def build_job_file( The path to the created job file. """ - set_umask() if "Presets" in self.__config.keys(): if preset and preset.lower() not in self.__config["Presets"].keys(): raise ValueError( @@ -551,7 +550,7 @@ def list_jobs(self): delta = ( datetime.fromtimestamp( time.mktime( - time.localtime(job_info["outfile"].stat().st_ctime) + time.localtime(job_info["outfile"].stat().st_ctime), ), ) - created_at diff --git a/src/OSmOSE/utils/core_utils.py b/src/OSmOSE/utils/core_utils.py index acb16a1d..b641f54a 100644 --- a/src/OSmOSE/utils/core_utils.py +++ b/src/OSmOSE/utils/core_utils.py @@ -526,10 +526,6 @@ def check_n_files( return len(bad_files) -def set_umask(): - os.umask(0o002) - - def get_files(path, extensions): all_files = [] for ext in extensions: @@ -873,3 +869,10 @@ def change_owner_group(path: Path, owner_group: str) -> None: ) glc.logger.error(message) raise PermissionError(message) from e + + +def get_umask() -> int: + """Return the current umask.""" + umask = os.umask(0) + os.umask(umask) + return umask