Skip to content

Commit

Permalink
Merge pull request #2 from yaseminbridges/1-add-docker-commands
Browse files Browse the repository at this point in the history
1 add docker commands
  • Loading branch information
yaseminbridges authored Jun 2, 2024
2 parents d3186eb + 67098fe commit 02a62eb
Show file tree
Hide file tree
Showing 8 changed files with 815 additions and 588 deletions.
3 changes: 2 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ tool_version: 1.0.0
variant_analysis: True
gene_analysis: True
disease_analysis: False
tool_specific_configuration_options:
tool_specific_configuration_options:
environment: apptainer # either apptainer or docker
1,204 changes: 624 additions & 580 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ packages = [{include = "pheval_ai_marrvel", from = "src"}]
[tool.poetry.dependencies]
python = "^3.10"
pheval = "^0.3.2"
docker = "^7.1.0"

[tool.poetry.scripts]
pheval-ai = "pheval_ai_marrvel.cli:main"
Expand Down
4 changes: 4 additions & 0 deletions src/pheval_ai_marrvel/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
VCF_FILE = "/input/vcf.gz"
HPO_TXT = "/input/hpo.txt"
DATA_DEPENDENCIES = "/run/data_dependencies"
OUTPUT_DIR = "/out"
152 changes: 152 additions & 0 deletions src/pheval_ai_marrvel/run/create_docker_commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from dataclasses import dataclass
from pathlib import Path
from typing import List

import docker
from docker import DockerClient
from pheval.utils.file_utils import all_files
from pheval.utils.phenopacket_utils import PhenopacketUtil, phenopacket_reader

from pheval_ai_marrvel.constants import DATA_DEPENDENCIES, HPO_TXT, OUTPUT_DIR, VCF_FILE


@dataclass
class AIMARRVELVolumes:
"""
Volumes to mount to run AI MARRVEL
Attributes:
vcf_path (str): Path to VCF file
data_dependencies (str): Path to data dependencies
hpo_txt (str): Path to hpo txt file
output_dir (str): Path to output directory
"""

vcf_path: str
data_dependencies: str
hpo_txt: str
output_dir: str


@dataclass
class SampleData:
"""
Sample data.
Attributes:
sample_id (str): Sample ID
genome_assembly (str): Genome assembly
vcf_name (Path): VCF file name
"""

sample_id: str
genome_assembly: str
vcf_name: Path


def get_sample_data(phenopacket_path: Path, vcf_dir: Path) -> SampleData:
"""
Get sample data.
Args:
phenopacket_path (Path): Path to phenopacket file
vcf_dir (Path): Path to VCF directory
Returns:
SampleData:The sample data
"""
phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
vcf_data = phenopacket_util.vcf_file_data(phenopacket_path, vcf_dir)
return SampleData(
sample_id=phenopacket_util.sample_id(),
genome_assembly=vcf_data.file_attributes["genomeAssembly"],
vcf_name=vcf_data.uri,
)


def create_volumes(
vcf_path: Path, data_dependencies: Path, hpo_txt: Path, output_dir: Path
) -> AIMARRVELVolumes:
"""
Create volumes to mount AI MARRVEL.
Args:
vcf_path (Path): Path to VCF file
data_dependencies (Path): Path to data dependencies
hpo_txt (Path): Path to hpo txt file
output_dir (Path): Path to output directory
Returns:
AIMARRVELVolumes:The volumes to mount AI MARRVEL
"""
return AIMARRVELVolumes(
vcf_path=f"{str(vcf_path)}:{VCF_FILE}",
data_dependencies=f"{str(data_dependencies)}:{DATA_DEPENDENCIES}",
hpo_txt=f"{str(hpo_txt)}:{HPO_TXT}",
output_dir=f"{str(output_dir)}:{OUTPUT_DIR}",
)


def create_docker_command(sample_data: SampleData) -> List[str]:
"""
Create docker command to run AI MARRVEL.
Args:
sample_data (SampleData): The sample data
Returns:
List[str]: The docker command to run AI MARRVEL
"""
return ["/run/proc.sh", sample_data.sample_id, sample_data.genome_assembly, "30G"]


def run_docker_sample(
phenopacket_path: Path,
vcf_dir: Path,
data_dependencies: Path,
hpo_txt: Path,
output_dir: Path,
client: DockerClient,
) -> None:
"""
Run docker command for a sample.
Args:
phenopacket_path (Path): Path to phenopacket file
vcf_dir (Path): Path to VCF directory
data_dependencies (str): Path to data dependencies
hpo_txt (str): Path to hpo txt file
output_dir (str): Path to output directory
client (DockerClient): Docker client
"""
sample_data = get_sample_data(phenopacket_path, vcf_dir)
docker_mounts = create_volumes(sample_data.vcf_name, data_dependencies, hpo_txt, output_dir)
vol = [
docker_mounts.vcf_path,
docker_mounts.hpo_txt,
docker_mounts.data_dependencies,
docker_mounts.output_dir,
]
docker_command = create_docker_command(sample_data)
container = client.containers.run(
"chaozhongliu/aim-lite",
" ".join(docker_command),
volumes=[x for x in vol if x is not None],
detach=True,
)
for line in container.logs(stream=True):
print(line.strip())


def run_docker(testdata_dir: Path, input_dir: Path, output_dir: Path) -> None:
"""
Run AI MARRVEL with docker on a corpus.
Args:
testdata_dir (Path): Path to test data directory
input_dir (Path): Path to input directory
output_dir (Path): Path to output directory
"""
client = docker.from_env()
for phenopacket_path in all_files(testdata_dir.joinpath("phenopackets")):
run_docker_sample(
phenopacket_path,
testdata_dir.joinpath("vcf"),
input_dir,
output_dir,
testdata_dir.joinpath(f"hpo_ids/{phenopacket_path.stem}.txt"),
client,
)
22 changes: 15 additions & 7 deletions src/pheval_ai_marrvel/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@
from pathlib import Path

from pheval_ai_marrvel.run.create_apptainer_commands import create_apptainer_commands
from pheval_ai_marrvel.run.create_docker_commands import run_docker


def run_commands(
tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
tool_input_commands_dir: Path,
testdata_dir: Path,
input_dir: Path,
output_dir: Path,
environment: str,
) -> None:
"""
Run the apptainer commands.
Expand All @@ -16,9 +21,12 @@ def run_commands(
input_dir (Path): Path to the input directory.
output_dir (Path): Path to the output directory.
"""
create_apptainer_commands(tool_input_commands_dir, testdata_dir, input_dir, output_dir)
batch_file = tool_input_commands_dir.joinpath(f"{testdata_dir.name}_commands.txt")
subprocess.run(
["bash", str(batch_file)],
shell=False,
)
if environment == "apptainer":
create_apptainer_commands(tool_input_commands_dir, testdata_dir, input_dir, output_dir)
batch_file = tool_input_commands_dir.joinpath(f"{testdata_dir.name}_commands.txt")
subprocess.run(
["bash", str(batch_file)],
shell=False,
)
elif environment == "docker":
run_docker(testdata_dir, input_dir, output_dir)
5 changes: 5 additions & 0 deletions src/pheval_ai_marrvel/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pheval_ai_marrvel.post_process.post_process import post_process_results
from pheval_ai_marrvel.prepare.prepare import prepare_inputs
from pheval_ai_marrvel.run.run import run_commands
from pheval_ai_marrvel.tool_specific_configuration_options import AIMARRVELConfigurations


@dataclass
Expand All @@ -29,11 +30,15 @@ def run(self):
Run AI-MARRVEL to produce the raw output.
"""
print("running with AI-MARRVEL")
config = AIMARRVELConfigurations.parse_config(
self.input_dir_config.tool_specific_configuration_options
)
run_commands(
tool_input_commands_dir=self.tool_input_commands_dir,
testdata_dir=self.testdata_dir,
input_dir=self.input_dir,
output_dir=self.output_dir,
environment=config.environment,
)

def post_process(self):
Expand Down
12 changes: 12 additions & 0 deletions src/pheval_ai_marrvel/tool_specific_configuration_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pydantic import BaseModel, Field


class AIMARRVELConfigurations(BaseModel):
"""
Class for defining the AI MARRVEL configurations in tool_specific_configurations field,
within the input_dir config.yaml
Args:
environment (str): Environment to run AI MARRVEL, i.e., docker/apptainer
"""

environment: str = Field(...)

0 comments on commit 02a62eb

Please sign in to comment.