Skip to content

Commit

Permalink
Added docker and docker compose
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomáš Houfek committed Jul 24, 2024
1 parent ead7698 commit 196aa72
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 43 deletions.
16 changes: 16 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM bitnami/python:3.10

RUN mkdir /pseudo-app

WORKDIR /pseudo-app

ADD requirements.txt .
ADD run_pseudonymization_pipeline.py .
ADD pseudonymization/ pseudonymization/
ADD tests/ tests/

RUN pip install -r requirements.txt

USER 1001

CMD ["pytest", "tests"]
18 changes: 18 additions & 0 deletions compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
version: '3.0'
services:
run:
build: .
volumes:
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/muni-sc/:/sc
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/seq/TRANSFER/:/TRANSFER
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/pseudonymisation_table/:/pseudonymization_tables/
- /home/houfek/Work/MMCI/sequencing_pipeline/data-catalogue-playground/Libraries/:/libraries
extra_hosts:
- "host.docker.internal:host-gateway"
command: bash -c "python run_pseudonymization_pipeline.py
/TRANSFER
/sc/MiSEQ
/pseudonymization_tables
/libraries
/sc/Libraries
'MiSEQ'"
4 changes: 2 additions & 2 deletions pseudonymization/helpers/config.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[miseq-config]
PSEUDONYMIZATION-API = http://localhost:8088/api
EXPORT-API = http://localhost:8080/api
PSEUDONYMIZATION-API = http://host.docker.internal:8088/api
EXPORT-API = http://host.docker.internal:8080/api
2 changes: 1 addition & 1 deletion pseudonymization/process/pseudonimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __call__(self):
pred_pseudo_tuples = self._get_all_predictive_numbers_pseudonymize_sample_sheet()
for pred, pseudo in pred_pseudo_tuples:
self._pseudonymize_file_names_recursively(pred, pseudo, self.run_path)
self._try_pseudonimize_content_of_files(pred, pseudo)
self._try_pseudonimize_content_of_files(pred, pseudo) # This needs to run after the _pseudo_files_names_req
clinical_data = ClinicalInfoFinder(self.run_path).collect_clinical_data(pred)

if clinical_data:
Expand Down
15 changes: 11 additions & 4 deletions run_pseudonymization_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@
from pseudonymization.process.pseudonimizer import RunPseudonimizer


def pseudonymize_miseq_runs(source_folder, destination_folder, pseudo_samples_folder, sequencing_libraries):
def pseudonymize_miseq_runs(source_folder, destination_folder, pseudo_samples_folder, sequencing_libraries, sc_libraries):
shutil.copytree(sequencing_libraries, sc_libraries, dirs_exist_ok=True)
for run in os.listdir(source_folder):
run_path = os.path.join(source_folder, run)
remove_miseq_run_files(run_path)
RunPseudonimizer(run_path, pseudo_samples_folder)()
mv_if_source_not_exist(run_path, os.path.join(destination_folder, run))


def pseudonymize_nextseq_runs(source_folder, destination_folder, pseudo_samples_folder, sequencing_libraries):
def pseudonymize_nextseq_runs(source_folder, destination_folder, pseudo_samples_folder, sequencing_libraries, sc_libraries):
shutil.copytree(sequencing_libraries, sc_libraries, dirs_exist_ok=True)
for run in os.listdir(source_folder):
run_path = os.path.join(source_folder, run)
remove_nextseq_run_files(run_path)
Expand All @@ -35,6 +37,11 @@ def pseudonymize_nextseq_runs(source_folder, destination_folder, pseudo_samples_
args = parser.parse_args()

if args.run_type == "MiSEQ":
pseudonymize_miseq_runs(args.source_folder, args.destination_folder, args.pseudo_tables_folder, args.sequencing_libraries)
pseudonymize_miseq_runs(args.source_folder,
args.destination_folder,
args.pseudo_tables_folder,
args.sequencing_libraries,
args.sequencing_libraries_sc)
print("DONE")
elif args.run_type == "NextSeq":
pseudonymize_nextseq_runs(args.source_folder, args.destination_folder, args.pseudo_tables_folder, args.sequencing_libraries)
pseudonymize_nextseq_runs(args.source_folder, args.destination_folder, args.pseudo_tables_folder, args.sequencing_libraries, args.sequencing_libraries_sc)
5 changes: 2 additions & 3 deletions tests/test_pseudonimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,11 @@ def test_save_clinical_data(mocker,

pat = Patient("mmci_patient_12345678-1234-5678-1234-567812345621", "Fri, 1 Mar 1990 00:00:00 GMT", 1, samples)

folder_for_clinical_data = os.path.join(FAKE_RUN_FOLDER_FOR_COPY, "catalog_info_per_pred_number")
folder_for_clinical_data = os.path.join(FAKE_RUN_FOR_TESTING, "catalog_info_per_pred_number")
pseudo_pred_number = "mmci_patient_12345678-1234-5678-1234-567812345688"

pseudonimizer = RunPseudonimizer(FAKE_RUN_FOR_TESTING, PSEUDONYMIZATION_FILES_FOLDER)
pseudonimizer._save_clinical_data(pat, folder_for_clinical_data, pseudo_pred_number)


assert os.path.exists(os.path.join(FAKE_RUN_FOLDER_FOR_COPY, "catalog_info_per_pred_number", f"{pseudo_pred_number}.json"))

assert os.path.exists(os.path.join(FAKE_RUN_FOR_TESTING, "catalog_info_per_pred_number", f"{pseudo_pred_number}.json"))

This file was deleted.

0 comments on commit 196aa72

Please sign in to comment.