Skip to content

Commit

Permalink
add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
yaseminbridges committed May 12, 2024
1 parent 562ad4d commit d3186eb
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/pheval_ai_marrvel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

@click.group()
def main():
"""Exomiser runner."""
"""AI-MARRVEL runner."""


main.add_command(post_process)
Expand Down
14 changes: 14 additions & 0 deletions src/pheval_ai_marrvel/post_process/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@


def post_process_results(raw_results_dir: Path, output_dir: Path) -> None:
"""
Post-process AI-MARRVEL raw results and create standardised PhEval TSV results.
Args:
raw_results_dir (Path): Path to the raw results directory.
output_dir (Path): Path to the output directory.
"""
create_standardised_results(raw_results_dir, output_dir)


Expand All @@ -21,4 +28,11 @@ def post_process_results(raw_results_dir: Path, output_dir: Path) -> None:
type=Path,
)
def post_process(raw_results_dir: Path, output_dir: Path) -> None:
"""
Post-process AI-MARRVEL raw results and create standardised PhEval TSV results.
Args:
raw_results_dir (Path): Path to the raw results directory.
output_dir (Path): Path to the output directory.
"""
post_process_results(raw_results_dir, output_dir)
40 changes: 38 additions & 2 deletions src/pheval_ai_marrvel/post_process/post_process_results_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def read_raw_result(raw_result_path: Path) -> pl.DataFrame:
raw_result_path(Path): Path to the raw result file.
Returns:
List[dict]: Contents of the raw result file.
pl.DataFrame: Contents of the raw result file.
"""
raw_result = pl.read_csv(raw_result_path)
raw_result = raw_result.rename({"Unnamed: 0": "variant"})
Expand Down Expand Up @@ -81,18 +81,54 @@ def obtain_gene_identifier(self, result_entry: dict) -> str:

@staticmethod
def obtain_chrom(variant_str: str) -> str:
"""
Obtain the chromosome from the variant entry.
Args:
variant_str (str): Variant entry.
Returns:
str: The chromosome.
"""
return variant_str.split("-")[0]

@staticmethod
def obtain_pos(variant_str: str) -> int:
"""
Obtain the position from the variant entry.
Args:
variant_str (str): Variant entry.
Returns:
int: The position.
"""
return int(variant_str.split("-")[1])

@staticmethod
def obtain_ref(variant_str: str) -> str:
"""
Obtain the reference allele from the variant entry.
Args:
variant_str (str): Variant entry.
Returns:
str: The reference allele.
"""
return variant_str.split("-")[2]

@staticmethod
def obtain_alt(variant_str: str) -> str:
"""
Obtain the alternate allele from the variant entry.
Args:
variant_str (str): Variant entry.
Returns:
str: The alternate allele.
"""
return variant_str.split("-")[3]

def extract_pheval_gene_requirements(self) -> List[PhEvalGeneResult]:
Expand Down Expand Up @@ -140,7 +176,7 @@ def extract_pheval_variant_requirements(self) -> List[PhEvalVariantResult]:

def create_standardised_results(raw_results_dir: Path, output_dir: Path) -> None:
"""
Create PhEval gene tsv output from raw results.
Create PhEval gene and variant tsv output from raw results.
Args:
raw_results_dir (Path): Path to the raw results directory.
Expand Down
6 changes: 6 additions & 0 deletions src/pheval_ai_marrvel/prepare/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,11 @@


def prepare_inputs(testdata_dir: Path) -> None:
"""
Prepare input files for AI Marrvel prediction from phenopackets.
Args:
testdata_dir (Path): Path to the test data directory.
"""
testdata_dir.joinpath("hpo_ids").mkdir(exist_ok=True)
write_input_txt_files(testdata_dir)
29 changes: 29 additions & 0 deletions src/pheval_ai_marrvel/prepare/prepare_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,42 @@


def obtain_observed_hpo_ids(phenopacket: Phenopacket) -> str:
"""
Obtain observed hpo ids from a Phenopacket.
Args:
phenopacket (Phenopacket): Phenopacket object.
Returns:
str: Observed hpo ids.
"""
observed_phenotypes = PhenopacketUtil(phenopacket).observed_phenotypic_features()
observed_hpo_ids = [hpo_id.type.id for hpo_id in observed_phenotypes]
observed_hpo_ids = "\n".join(observed_hpo_ids)
return observed_hpo_ids


def write_txt_input(observed_hpo_ids: str, output_file_name: Path) -> None:
"""
Write observed hpo ids to a txt file.
Args:
observed_hpo_ids (str): Observed hpo ids.
output_file_name (str): Output file name.
"""
with open(output_file_name, "w") as f:
f.write(observed_hpo_ids)
f.close()


def write_observed_hpo_ids(phenopacket_path: Path, testdata_dir: Path) -> None:
"""
Write observed hpo ids to a txt file.
Args:
phenopacket_path (Path): Phenopacket path.
testdata_dir (Path): Path to test data directory.
"""
phenopacket = phenopacket_reader(phenopacket_path)
observed_hpo_ids = obtain_observed_hpo_ids(phenopacket)
write_txt_input(
Expand All @@ -27,5 +50,11 @@ def write_observed_hpo_ids(phenopacket_path: Path, testdata_dir: Path) -> None:


def write_input_txt_files(testdata_dir: Path) -> None:
"""
Write observed hpo ids to txt files for a corpus.
Args:
testdata_dir (Path): Path to test data directory.
"""
for phenopacket_path in all_files(Path(testdata_dir).joinpath("phenopackets")):
write_observed_hpo_ids(phenopacket_path, testdata_dir)
50 changes: 50 additions & 0 deletions src/pheval_ai_marrvel/run/create_apptainer_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@

@dataclass
class ApptainerArguments:
"""
Arguments required for running AI-MARRVEL apptainer commands.
Attributes:
sample_id (str): The sample ID.
vcf_path (Path): The VCF file path.
vcf_assembly (str): The VCF assembly.
hpo_txt_file_path (Path): The hpo txt file path.
data_dependencies (Path): The data_dependencies path.
output_directory (Path): The output directory path.
"""

sample_id: str
vcf_path: Path
vcf_assembly: str
Expand All @@ -19,6 +31,18 @@ class ApptainerArguments:
def get_apptainer_arguments(
phenopacket_path: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
) -> ApptainerArguments:
"""
Get apptainer arguments for running AI-MARRVEL apptainer commands for a phenopacket.
Args:
phenopacket_path (Path): The phenopacket path.
testdata_dir (Path): The testdata directory.
input_dir (Path): The input directory.
output_dir (Path): The output directory.
Returns:
ApptainerArgument: The arguments for running AI-MARRVEL apptainer commands.
"""
phenopacket = phenopacket_reader(phenopacket_path)
vcf_file_data = PhenopacketUtil(phenopacket).vcf_file_data(
phenopacket_path, testdata_dir.joinpath("vcf")
Expand All @@ -34,6 +58,15 @@ def get_apptainer_arguments(


def create_apptainer_command(apptainer_arguments: ApptainerArguments) -> str:
"""
Create an apptainer command for running AI-MARRVEL for a sample.
Args:
apptainer_arguments(ApptainerArguments): Arguments for running AI-MARRVEL with apptainer.
Returns:
str: The string apptainer command.
"""
return (
f"apptainer run --mount type=bind,source={apptainer_arguments.vcf_path},destination=/input/vcf.gz"
f" --mount type=bind,source={apptainer_arguments.hpo_txt_file_path},destination=/input/hpo.txt"
Expand All @@ -45,6 +78,14 @@ def create_apptainer_command(apptainer_arguments: ApptainerArguments) -> str:


def write_commands(commands: List[str], tool_input_commands_dir: Path, testdata_dir: Path) -> None:
"""
Write commands to a txt file.
Args:
commands (List[str]): The commands to write.
tool_input_commands_dir (Path): The tool input commands directory.
testdata_dir (Path): The testdata directory.
"""
joined_commands_str = "\n".join(commands)
with open(
f"{tool_input_commands_dir.joinpath(f'{testdata_dir.name}_commands.txt')}", "w"
Expand All @@ -56,6 +97,15 @@ def write_commands(commands: List[str], tool_input_commands_dir: Path, testdata_
def create_apptainer_commands(
tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
) -> None:
"""
Create apptainer commands for running AI-MARRVEL apptainer with a corpus.
Args:
tool_input_commands_dir (Path): The tool input commands directory.
testdata_dir (Path): The testdata directory.
input_dir (Path): The input directory.
output_dir (Path): The output directory.
"""
all_commands = []
for phenopacket_path in all_files(testdata_dir.joinpath("phenopackets")):
apptainer_arguments = get_apptainer_arguments(
Expand Down
11 changes: 10 additions & 1 deletion src/pheval_ai_marrvel/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,17 @@


def run_commands(
tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir
tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
) -> None:
"""
Run the apptainer commands.
Args:
tool_input_commands_dir (Path): Path to the tool input commands directory.
testdata_dir (Path): Path to the test data directory.
input_dir (Path): Path to the input directory.
output_dir (Path): Path to the output directory.
"""
create_apptainer_commands(tool_input_commands_dir, testdata_dir, input_dir, output_dir)
batch_file = tool_input_commands_dir.joinpath(f"{testdata_dir.name}_commands.txt")
subprocess.run(
Expand Down
8 changes: 4 additions & 4 deletions src/pheval_ai_marrvel/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ class AIMARRVELRunner(PhEvalRunner):

def prepare(self):
"""
Pre-process any data and inputs necessary to run the tool.
Pre-process phenopackets into tool accepted format.
"""
print("preparing")
print("creating HPO txt files from phenopackets")
prepare_inputs(testdata_dir=self.testdata_dir)

def run(self):
"""
Run the tool to produce the raw output.
Run AI-MARRVEL to produce the raw output.
"""
print("running with fake predictor")
print("running with AI-MARRVEL")
run_commands(
tool_input_commands_dir=self.tool_input_commands_dir,
testdata_dir=self.testdata_dir,
Expand Down

0 comments on commit d3186eb

Please sign in to comment.