-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Tomáš Houfek
committed
Dec 12, 2024
1 parent
fdb8a19
commit b8b9fdc
Showing
154 changed files
with
387 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM bitnami/python:3.10 | ||
|
||
RUN mkdir /organisation-app | ||
|
||
WORKDIR /organisation-app | ||
|
||
ADD requirements.txt . | ||
ADD main.py . | ||
ADD organiser/ organiser/ | ||
ADD tests/ tests/ | ||
|
||
RUN pip install -r requirements.txt | ||
|
||
USER 1001 |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,16 @@ | ||
version: '3.0' | ||
services: | ||
run: | ||
build: . | ||
build: | ||
context: . | ||
dockerfile: Dockerfile.dev | ||
volumes: | ||
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/muni-sc/PseudonymizedRunes:/PseudonymizedRuns | ||
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/muni-sc/PseudonymizedRuns:/PseudonymizedRuns | ||
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/muni-sc/NextSeqTemp/:/NextSeqTemp | ||
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/muni-sc/OrganisedRuns/:/OrganisedRuns | ||
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/muni-sc/Patients/:/Patients | ||
command: bash -c "python main.py | ||
-r /PseudonymizedRuns/ | ||
-n /NextSeqTemp/ | ||
-o /OrganisedRuns/ | ||
-p /Patients/" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from organiser.helpers.file_helpers import copy_if_exists, copy_folder_if_exists | ||
from organiser.run_organisers.old_miseq_organise_run import OldMiseqRunOrganiser | ||
import os | ||
from pathlib import Path | ||
|
||
class NextSeqRunOrganiser(OldMiseqRunOrganiser): | ||
|
||
def organise_run(self): | ||
y = self._get_file_year() | ||
machine = "NextSeq" | ||
folder_for_run_path = os.path.join(self.organised_runs, y, machine) | ||
Path(folder_for_run_path).mkdir(parents=True, exist_ok=True) | ||
self._create_sample_dirs(folder_for_run_path) | ||
self._create_general_file(folder_for_run_path) | ||
self._create_patient_files_if_clinical_data_exist() | ||
return os.path.join(folder_for_run_path, self.file) | ||
|
||
def _create_general_file(self, new_file_path): | ||
self._copy_important_files(os.path.join(self.pseudo_run, self.file), os.path.join(new_file_path, self.file)) | ||
self._copy_important_folders(os.path.join(self.pseudo_run, self.file), os.path.join(new_file_path, self.file)) | ||
|
||
def _collect_data_for_pseudo_number(self, new_folder, pseudo_number): | ||
fastq_files = os.path.join(self.pseudo_run, self.file, "FASTQ") | ||
if not os.path.exists(fastq_files): | ||
return | ||
os.mkdir(os.path.join(new_folder, "FASTQ")) | ||
for file in os.listdir(fastq_files): | ||
if pseudo_number in file: | ||
copy_if_exists(os.path.join(fastq_files, file), | ||
os.path.join(new_folder, "FASTQ", file)) | ||
|
||
|
||
def _copy_important_files(self, old_path, new_path): | ||
files_to_move = ["RunInfo.xml", "RunParameters.xml", "RunCompletionStatus.xml", "SampleSheet.csv"] | ||
|
||
for file in files_to_move: | ||
old_file_path = os.path.join(old_path, file) | ||
new_file_path = os.path.join(new_path, file) | ||
copy_if_exists(old_file_path, new_file_path) | ||
|
||
|
||
def _copy_important_folders(self, old_path, new_path): | ||
folders_path = [ | ||
"Data", | ||
"catalog_info_per_pred_number" | ||
] | ||
for folder in folders_path: | ||
old_folder_path = os.path.join(old_path, folder) | ||
new_folder_path = os.path.join(new_path, folder) | ||
copy_folder_if_exists(old_folder_path, new_folder_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import pytest | ||
import os | ||
import shutil | ||
|
||
from organiser.run_organisers.nextseq_organise_run import NextSeqRunOrganiser | ||
|
||
FAKE_ALL_RUNS_FOR_TESTING = os.path.join(os.path.dirname(__file__), "FAKE_PSEUDONYMIZED_RUNS") | ||
FAKE_RUN_FOR_COPY = os.path.join(os.path.dirname(__file__), "test_pseudonymized_runs", | ||
"230101_N0000000_0000_0000000000") | ||
|
||
FAKE_RUN_FOR_TESTING = os.path.join(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000") | ||
FAKE_DESTINATION_FILES = os.path.join(os.path.dirname(__file__), "test_destination") | ||
FAKE_PATIENT_FILES = os.path.join(os.path.dirname(__file__), "test_patients") | ||
|
||
def _copy_fake_run(): | ||
shutil.copytree(FAKE_RUN_FOR_COPY, FAKE_RUN_FOR_TESTING) | ||
os.mkdir(FAKE_DESTINATION_FILES) | ||
os.mkdir(FAKE_PATIENT_FILES) | ||
|
||
def _remove_coppied_fake_run(): | ||
shutil.rmtree(FAKE_ALL_RUNS_FOR_TESTING) | ||
shutil.rmtree(FAKE_PATIENT_FILES) | ||
shutil.rmtree(FAKE_DESTINATION_FILES) | ||
|
||
@pytest.fixture(autouse=True) | ||
def setup_and_teardown_organise_files(request): | ||
_copy_fake_run() | ||
request.addfinalizer(_remove_coppied_fake_run) | ||
|
||
|
||
@pytest.fixture | ||
def remove_fastq_folders(): | ||
shutil.rmtree(os.path.join(FAKE_RUN_FOR_TESTING, "FASTQ")) | ||
|
||
|
||
def test_run_is_in_correct_sturecture(): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
assert os.path.exists(os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", "230101_N0000000_0000_0000000000")) | ||
|
||
def test_data_folder_structred(): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
assert os.path.exists(os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", "230101_N0000000_0000_0000000000", | ||
"Data", "Intensities", "BaseCalls")) | ||
for i in range(1,5): | ||
assert os.path.exists(os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", "230101_N0000000_0000_0000000000", | ||
"Data", "Intensities", f"L00{i}")) | ||
|
||
def test_samples_folder_contains_fastq_files(): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
assert os.path.exists(os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", "230101_N0000000_0000_0000000000", | ||
"Samples")) | ||
|
||
def test_individual_fastq_files(): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
for i in range(8): | ||
sample_files = os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", "230101_N0000000_0000_0000000000", | ||
"Samples") | ||
assert os.path.exists(os.path.join(sample_files, f"2023_000{i}_DNA", "FASTQ")) | ||
assert os.path.exists(os.path.join(sample_files, f"2023_000{i}_RNA", "FASTQ")) | ||
assert all([file.endswith("fastq.gz") for file in os.listdir(os.path.join(sample_files, | ||
f"2023_000{i}_DNA", | ||
"FASTQ"))]) | ||
assert len(os.listdir(os.path.join(sample_files, f"2023_000{i}_DNA", "FASTQ"))) == 8 | ||
|
||
def test_missing_fastq_files(remove_fastq_folders): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
sample_files = os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", "230101_N0000000_0000_0000000000", | ||
"Samples") | ||
assert not os.path.exists(os.path.join(sample_files, "2023_0000_DNA", "FASTQ")) | ||
for i in range(8): | ||
assert os.path.exists(os.path.join(sample_files, f"2023_000{i}_DNA")) | ||
assert os.path.exists(os.path.join(sample_files, f"2023_000{i}_RNA")) | ||
|
||
|
||
@pytest.mark.parametrize("filename", ["SampleSheet.csv", "RunParameters.xml", "RunInfo.xml", "RunCompletionStatus.xml"]) | ||
def test_individual_nextseq_files(filename): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
assert os.path.exists(os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", "230101_N0000000_0000_0000000000", | ||
filename)) | ||
|
||
|
||
def test_catalogue_info_pred_number(): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
assert os.path.exists(os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", | ||
"230101_N0000000_0000_0000000000", "catalog_info_per_pred_number")) | ||
|
||
def test_catalog_info_missing_no_error(remove_catalog_info_per_pred_number): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
assert not os.path.exists(os.path.join(FAKE_DESTINATION_FILES, "2023", "NextSeq", | ||
"230101_N0000000_0000_0000000000", "catalog_info_per_pred_number")) | ||
|
||
|
||
def test_patient_correctly_created_in_tree(): | ||
organiser = NextSeqRunOrganiser(FAKE_ALL_RUNS_FOR_TESTING, "230101_N0000000_0000_0000000000", | ||
FAKE_DESTINATION_FILES, FAKE_PATIENT_FILES) | ||
organiser.organise_run() | ||
|
||
|
||
assert os.path.exists(os.path.join(FAKE_PATIENT_FILES, "2000", "mmci_patient_00000000-0000-0000-0000-000000000001", | ||
"patient_metadata.json")) | ||
|
||
|
||
@pytest.fixture | ||
def remove_catalog_info_per_pred_number(): | ||
shutil.rmtree(os.path.join(FAKE_RUN_FOR_TESTING, "catalog_info_per_pred_number")) |
Oops, something went wrong.