-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactoring, added tests, working for old and new miseq
- Loading branch information
Tomáš Houfek
committed
Oct 3, 2024
1 parent
f380eda
commit 8fff7a4
Showing
1,217 changed files
with
430 additions
and
192 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import os | ||
import shutil | ||
import logging | ||
import sys | ||
from datetime import datetime | ||
from organiser.run_organisers.old_miseq_organise_run import OldMiseqRunOrganiser | ||
from organiser.run_organisers.new_miseq_organise_run import NewMiseqOrganiseRun | ||
from organiser.run_organisers.organise_run import OrganiseRun | ||
from organiser.helpers.file_helpers import create_dictionary_if_not_exist | ||
|
||
|
||
class Processor: | ||
def __init__(self, pseudnymized_runs_folder, folder_for_organised_files, patient_folder): | ||
self.psedunymized_runs_folder = pseudnymized_runs_folder | ||
self.organised_files_folder = folder_for_organised_files | ||
self.patient_folder = patient_folder | ||
|
||
def process_runs(self): | ||
self._create_important_folders_if_not_exist() | ||
logging.basicConfig(filename=os.path.join(self.organised_files_folder, "logs", | ||
datetime.now().strftime('%d_%m_%Y-%H_%M.log')), | ||
encoding='utf-8', | ||
level=logging.INFO) | ||
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) | ||
|
||
for run in os.listdir(self.psedunymized_runs_folder): | ||
if run in ["backups", "logs", "errors"]: | ||
continue | ||
logging.info(f"Organising: {run}") | ||
organiser = self._get_correct_organiser(run) | ||
self._try_organise_run(run, organiser) | ||
|
||
logging.info("Done!") | ||
|
||
def _try_organise_run(self, run, organiser) -> bool: | ||
try: | ||
organiser.organise_run() | ||
except FileNotFoundError as e: | ||
logging.error(f"Run {run} is missing some data\nError:\n{e}") | ||
shutil.move(os.path.join(self.psedunymized_runs_folder, run), | ||
os.path.join(self.organised_files_folder, "errors", run)) | ||
return False | ||
|
||
shutil.move(os.path.join(self.psedunymized_runs_folder, run), | ||
os.path.join(self.organised_files_folder, "backups", run)) | ||
logging.info(f"Run {run} moved into backups") | ||
return True | ||
|
||
def _create_important_folders_if_not_exist(self): | ||
create_dictionary_if_not_exist(os.path.join(self.organised_files_folder, "logs")) | ||
create_dictionary_if_not_exist(os.path.join(self.organised_files_folder, "backups")) | ||
create_dictionary_if_not_exist(os.path.join(self.organised_files_folder, "errors")) | ||
|
||
def _get_correct_organiser(self, run_path) -> OrganiseRun: | ||
full_run_path = os.path.join(self.psedunymized_runs_folder, run_path) | ||
|
||
if "Alignment_1" in os.listdir(full_run_path) or "SoftwareVersionsFile" in os.listdir(full_run_path): | ||
logging.info(f"{run_path} processed as New Miseq") | ||
return NewMiseqOrganiseRun(self.psedunymized_runs_folder, run_path, | ||
self.organised_files_folder, self.patient_folder) | ||
else: | ||
logging.info(f"{run_path} processed as Old Miseq") | ||
return OldMiseqRunOrganiser(self.psedunymized_runs_folder, run_path, | ||
self.organised_files_folder, self.patient_folder) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import os | ||
from pathlib import Path | ||
|
||
from organiser.helpers.file_helpers import copy_folder_if_exists, copy_if_exists | ||
from .old_miseq_organise_run import OldMiseqRunOrganiser | ||
|
||
|
||
class NewMiseqOrganiseRun(OldMiseqRunOrganiser): | ||
|
||
def organise_run(self): | ||
y = self._get_file_year() | ||
machine = "MiSEQ" | ||
folder_for_run_path = os.path.join(self.organised_runs, y, machine) | ||
Path(folder_for_run_path).mkdir(parents=True, exist_ok=True) | ||
self._create_sample_dirs(folder_for_run_path) | ||
self._create_general_file(folder_for_run_path) | ||
self._create_patient_files_if_clinical_data_exist() | ||
return os.path.join(folder_for_run_path, self.file) | ||
|
||
def _collect_data_for_pseudo_number(self, new_folder, pseudo_number): | ||
fastq_folder = os.path.join(self.pseudo_run, self.file, "Alignment_1", "Fastq") | ||
new_fastq_folder = os.path.join(new_folder, "FASTQ") | ||
Path(new_fastq_folder).mkdir(parents=True, exist_ok=True) | ||
|
||
for file in os.listdir(fastq_folder): | ||
if pseudo_number in file: | ||
copy_if_exists(os.path.join(fastq_folder, file), | ||
os.path.join(new_fastq_folder, file)) | ||
|
||
self._collect_analysis(new_folder, pseudo_number) | ||
|
||
def _create_general_file(self, new_file_path): | ||
general_file_path = os.path.join(self.pseudo_run, self.file) | ||
new_general_file_path = os.path.join(new_file_path, self.file) | ||
Path(new_general_file_path).mkdir(parents=True, exist_ok=True) | ||
|
||
self._copy_important_files(general_file_path, new_general_file_path) | ||
self._copy_important_folders(general_file_path, new_general_file_path) | ||
|
||
def _copy_important_files(self, old_path, new_path): | ||
files_to_move = [ | ||
os.path.join("Alignment_1", "AnalysisLog.txt"), | ||
os.path.join("Alignment_1", "CompletedJobInfo.xml"), | ||
"RunParameters.xml", | ||
"RunInfo.xml", | ||
"SampleSheet.csv", | ||
"GenerateFASTQRunStatistics.xml" | ||
] | ||
for file in files_to_move: | ||
base = os.path.basename(file) | ||
old_file_path = os.path.join(old_path, file) | ||
new_file_path = os.path.join(new_path, os.path.basename(file)) | ||
copy_if_exists(old_file_path, new_file_path) | ||
|
||
def _copy_important_folders(self, old_path, new_path): | ||
folder_paths = [("Alignment_1", "Alignment"), | ||
("catalog_info_per_pred_number", "catalog_info_per_pred_number")] | ||
for old, new in folder_paths: | ||
old_folder_path = os.path.join(old_path, old) | ||
new_folder_path = os.path.join(new_path, new) | ||
copy_folder_if_exists(old_folder_path, new_folder_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from abc import ABC, abstractmethod | ||
|
||
|
||
class OrganiseRun(ABC): | ||
|
||
@abstractmethod | ||
def organise_run(self): | ||
... |
Oops, something went wrong.