Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1 add docker commands #2

Merged
merged 7 commits into from
Jun 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ tool_version: 1.0.0
variant_analysis: True
gene_analysis: True
disease_analysis: False
tool_specific_configuration_options:
tool_specific_configuration_options:
environment: apptainer # either apptainer or docker
1,204 changes: 624 additions & 580 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ packages = [{include = "pheval_ai_marrvel", from = "src"}]
[tool.poetry.dependencies]
python = "^3.10"
pheval = "^0.3.2"
docker = "^7.1.0"

[tool.poetry.scripts]
pheval-ai = "pheval_ai_marrvel.cli:main"
Expand Down
4 changes: 4 additions & 0 deletions src/pheval_ai_marrvel/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
VCF_FILE = "/input/vcf.gz"
HPO_TXT = "/input/hpo.txt"
DATA_DEPENDENCIES = "/run/data_dependencies"
OUTPUT_DIR = "/out"
152 changes: 152 additions & 0 deletions src/pheval_ai_marrvel/run/create_docker_commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from dataclasses import dataclass
from pathlib import Path
from typing import List

import docker
from docker import DockerClient
from pheval.utils.file_utils import all_files
from pheval.utils.phenopacket_utils import PhenopacketUtil, phenopacket_reader

from pheval_ai_marrvel.constants import DATA_DEPENDENCIES, HPO_TXT, OUTPUT_DIR, VCF_FILE


@dataclass
class AIMARRVELVolumes:
"""
Volumes to mount to run AI MARRVEL

Attributes:
vcf_path (str): Path to VCF file
data_dependencies (str): Path to data dependencies
hpo_txt (str): Path to hpo txt file
output_dir (str): Path to output directory
"""

vcf_path: str
data_dependencies: str
hpo_txt: str
output_dir: str


@dataclass
class SampleData:
"""
Sample data.
Attributes:
sample_id (str): Sample ID
genome_assembly (str): Genome assembly
vcf_name (Path): VCF file name
"""

sample_id: str
genome_assembly: str
vcf_name: Path


def get_sample_data(phenopacket_path: Path, vcf_dir: Path) -> SampleData:
"""
Get sample data.

Args:
phenopacket_path (Path): Path to phenopacket file
vcf_dir (Path): Path to VCF directory

Returns:
SampleData:The sample data
"""
phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
vcf_data = phenopacket_util.vcf_file_data(phenopacket_path, vcf_dir)
return SampleData(
sample_id=phenopacket_util.sample_id(),
genome_assembly=vcf_data.file_attributes["genomeAssembly"],
vcf_name=vcf_data.uri,
)


def create_volumes(
vcf_path: Path, data_dependencies: Path, hpo_txt: Path, output_dir: Path
) -> AIMARRVELVolumes:
"""
Create volumes to mount AI MARRVEL.
Args:
vcf_path (Path): Path to VCF file
data_dependencies (Path): Path to data dependencies
hpo_txt (Path): Path to hpo txt file
output_dir (Path): Path to output directory
Returns:
AIMARRVELVolumes:The volumes to mount AI MARRVEL
"""
return AIMARRVELVolumes(
vcf_path=f"{str(vcf_path)}:{VCF_FILE}",
data_dependencies=f"{str(data_dependencies)}:{DATA_DEPENDENCIES}",
hpo_txt=f"{str(hpo_txt)}:{HPO_TXT}",
output_dir=f"{str(output_dir)}:{OUTPUT_DIR}",
)


def create_docker_command(sample_data: SampleData) -> List[str]:
"""
Create docker command to run AI MARRVEL.
Args:
sample_data (SampleData): The sample data
Returns:
List[str]: The docker command to run AI MARRVEL
"""
return ["/run/proc.sh", sample_data.sample_id, sample_data.genome_assembly, "30G"]


def run_docker_sample(
phenopacket_path: Path,
vcf_dir: Path,
data_dependencies: Path,
hpo_txt: Path,
output_dir: Path,
client: DockerClient,
) -> None:
"""
Run docker command for a sample.
Args:
phenopacket_path (Path): Path to phenopacket file
vcf_dir (Path): Path to VCF directory
data_dependencies (str): Path to data dependencies
hpo_txt (str): Path to hpo txt file
output_dir (str): Path to output directory
client (DockerClient): Docker client
"""
sample_data = get_sample_data(phenopacket_path, vcf_dir)
docker_mounts = create_volumes(sample_data.vcf_name, data_dependencies, hpo_txt, output_dir)
vol = [
docker_mounts.vcf_path,
docker_mounts.hpo_txt,
docker_mounts.data_dependencies,
docker_mounts.output_dir,
]
docker_command = create_docker_command(sample_data)
container = client.containers.run(
"chaozhongliu/aim-lite",
" ".join(docker_command),
volumes=[x for x in vol if x is not None],
detach=True,
)
for line in container.logs(stream=True):
print(line.strip())


def run_docker(testdata_dir: Path, input_dir: Path, output_dir: Path) -> None:
"""
Run AI MARRVEL with docker on a corpus.
Args:
testdata_dir (Path): Path to test data directory
input_dir (Path): Path to input directory
output_dir (Path): Path to output directory
"""
client = docker.from_env()
for phenopacket_path in all_files(testdata_dir.joinpath("phenopackets")):
run_docker_sample(
phenopacket_path,
testdata_dir.joinpath("vcf"),
input_dir,
output_dir,
testdata_dir.joinpath(f"hpo_ids/{phenopacket_path.stem}.txt"),
client,
)
22 changes: 15 additions & 7 deletions src/pheval_ai_marrvel/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@
from pathlib import Path

from pheval_ai_marrvel.run.create_apptainer_commands import create_apptainer_commands
from pheval_ai_marrvel.run.create_docker_commands import run_docker


def run_commands(
tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
tool_input_commands_dir: Path,
testdata_dir: Path,
input_dir: Path,
output_dir: Path,
environment: str,
) -> None:
"""
Run the apptainer commands.
Expand All @@ -16,9 +21,12 @@ def run_commands(
input_dir (Path): Path to the input directory.
output_dir (Path): Path to the output directory.
"""
create_apptainer_commands(tool_input_commands_dir, testdata_dir, input_dir, output_dir)
batch_file = tool_input_commands_dir.joinpath(f"{testdata_dir.name}_commands.txt")
subprocess.run(
["bash", str(batch_file)],
shell=False,
)
if environment == "apptainer":
create_apptainer_commands(tool_input_commands_dir, testdata_dir, input_dir, output_dir)
batch_file = tool_input_commands_dir.joinpath(f"{testdata_dir.name}_commands.txt")
subprocess.run(
["bash", str(batch_file)],
shell=False,
)
elif environment == "docker":
run_docker(testdata_dir, input_dir, output_dir)
5 changes: 5 additions & 0 deletions src/pheval_ai_marrvel/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pheval_ai_marrvel.post_process.post_process import post_process_results
from pheval_ai_marrvel.prepare.prepare import prepare_inputs
from pheval_ai_marrvel.run.run import run_commands
from pheval_ai_marrvel.tool_specific_configuration_options import AIMARRVELConfigurations


@dataclass
Expand All @@ -29,11 +30,15 @@ def run(self):
Run AI-MARRVEL to produce the raw output.
"""
print("running with AI-MARRVEL")
config = AIMARRVELConfigurations.parse_config(
self.input_dir_config.tool_specific_configuration_options
)
run_commands(
tool_input_commands_dir=self.tool_input_commands_dir,
testdata_dir=self.testdata_dir,
input_dir=self.input_dir,
output_dir=self.output_dir,
environment=config.environment,
)

def post_process(self):
Expand Down
12 changes: 12 additions & 0 deletions src/pheval_ai_marrvel/tool_specific_configuration_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pydantic import BaseModel, Field


class AIMARRVELConfigurations(BaseModel):
"""
Class for defining the AI MARRVEL configurations in tool_specific_configurations field,
within the input_dir config.yaml
Args:
environment (str): Environment to run AI MARRVEL, i.e., docker/apptainer
"""

environment: str = Field(...)
Loading