add docstrings

yaseminbridges · May 12, 2024 · d3186eb · d3186eb
1 parent 562ad4d
commit d3186eb
Show file tree

Hide file tree

Showing 8 changed files with 152 additions and 8 deletions.
diff --git a/src/pheval_ai_marrvel/cli.py b/src/pheval_ai_marrvel/cli.py
@@ -5,7 +5,7 @@
 
 @click.group()
 def main():
-    """Exomiser runner."""
+    """AI-MARRVEL runner."""
 
 
 main.add_command(post_process)

diff --git a/src/pheval_ai_marrvel/post_process/post_process.py b/src/pheval_ai_marrvel/post_process/post_process.py
@@ -6,6 +6,13 @@
 
 
 def post_process_results(raw_results_dir: Path, output_dir: Path) -> None:
+    """
+    Post-process AI-MARRVEL raw results and create standardised PhEval TSV results.
+
+    Args:
+        raw_results_dir (Path): Path to the raw results directory.
+        output_dir (Path): Path to the output directory.
+    """
     create_standardised_results(raw_results_dir, output_dir)
 
 
@@ -21,4 +28,11 @@ def post_process_results(raw_results_dir: Path, output_dir: Path) -> None:
     type=Path,
 )
 def post_process(raw_results_dir: Path, output_dir: Path) -> None:
+    """
+    Post-process AI-MARRVEL raw results and create standardised PhEval TSV results.
+
+    Args:
+        raw_results_dir (Path): Path to the raw results directory.
+        output_dir (Path): Path to the output directory.
+    """
     post_process_results(raw_results_dir, output_dir)
diff --git a/src/pheval_ai_marrvel/post_process/post_process_results_format.py b/src/pheval_ai_marrvel/post_process/post_process_results_format.py
@@ -20,7 +20,7 @@ def read_raw_result(raw_result_path: Path) -> pl.DataFrame:
         raw_result_path(Path): Path to the raw result file.
 
     Returns:
-        List[dict]: Contents of the raw result file.
+        pl.DataFrame: Contents of the raw result file.
     """
     raw_result = pl.read_csv(raw_result_path)
     raw_result = raw_result.rename({"Unnamed: 0": "variant"})
@@ -81,18 +81,54 @@ def obtain_gene_identifier(self, result_entry: dict) -> str:
 
     @staticmethod
     def obtain_chrom(variant_str: str) -> str:
+        """
+        Obtain the chromosome from the variant entry.
+
+        Args:
+            variant_str (str): Variant entry.
+
+        Returns:
+            str: The chromosome.
+        """
         return variant_str.split("-")[0]
 
     @staticmethod
     def obtain_pos(variant_str: str) -> int:
+        """
+        Obtain the position from the variant entry.
+
+        Args:
+            variant_str (str): Variant entry.
+
+        Returns:
+            int: The position.
+        """
         return int(variant_str.split("-")[1])
 
     @staticmethod
     def obtain_ref(variant_str: str) -> str:
+        """
+        Obtain the reference allele from the variant entry.
+
+        Args:
+            variant_str (str): Variant entry.
+
+        Returns:
+            str: The reference allele.
+        """
         return variant_str.split("-")[2]
 
     @staticmethod
     def obtain_alt(variant_str: str) -> str:
+        """
+        Obtain the alternate allele from the variant entry.
+
+        Args:
+            variant_str (str): Variant entry.
+
+        Returns:
+            str: The alternate allele.
+        """
         return variant_str.split("-")[3]
 
     def extract_pheval_gene_requirements(self) -> List[PhEvalGeneResult]:
@@ -140,7 +176,7 @@ def extract_pheval_variant_requirements(self) -> List[PhEvalVariantResult]:
 
 def create_standardised_results(raw_results_dir: Path, output_dir: Path) -> None:
     """
-    Create PhEval gene tsv output from raw results.
+    Create PhEval gene and variant tsv output from raw results.
 
     Args:
         raw_results_dir (Path): Path to the raw results directory.

diff --git a/src/pheval_ai_marrvel/prepare/prepare.py b/src/pheval_ai_marrvel/prepare/prepare.py
@@ -4,5 +4,11 @@
 
 
 def prepare_inputs(testdata_dir: Path) -> None:
+    """
+    Prepare input files for AI Marrvel prediction from phenopackets.
+
+    Args:
+        testdata_dir (Path): Path to the test data directory.
+    """
     testdata_dir.joinpath("hpo_ids").mkdir(exist_ok=True)
     write_input_txt_files(testdata_dir)
diff --git a/src/pheval_ai_marrvel/prepare/prepare_input.py b/src/pheval_ai_marrvel/prepare/prepare_input.py
@@ -6,19 +6,42 @@
 
 
 def obtain_observed_hpo_ids(phenopacket: Phenopacket) -> str:
+    """
+    Obtain observed hpo ids from a Phenopacket.
+
+    Args:
+        phenopacket (Phenopacket): Phenopacket object.
+
+    Returns:
+        str: Observed hpo ids.
+    """
     observed_phenotypes = PhenopacketUtil(phenopacket).observed_phenotypic_features()
     observed_hpo_ids = [hpo_id.type.id for hpo_id in observed_phenotypes]
     observed_hpo_ids = "\n".join(observed_hpo_ids)
     return observed_hpo_ids
 
 
 def write_txt_input(observed_hpo_ids: str, output_file_name: Path) -> None:
+    """
+    Write observed hpo ids to a txt file.
+
+    Args:
+        observed_hpo_ids (str): Observed hpo ids.
+        output_file_name (str): Output file name.
+    """
     with open(output_file_name, "w") as f:
         f.write(observed_hpo_ids)
     f.close()
 
 
 def write_observed_hpo_ids(phenopacket_path: Path, testdata_dir: Path) -> None:
+    """
+    Write observed hpo ids to a txt file.
+
+    Args:
+        phenopacket_path (Path): Phenopacket path.
+        testdata_dir (Path): Path to test data directory.
+    """
     phenopacket = phenopacket_reader(phenopacket_path)
     observed_hpo_ids = obtain_observed_hpo_ids(phenopacket)
     write_txt_input(
@@ -27,5 +50,11 @@ def write_observed_hpo_ids(phenopacket_path: Path, testdata_dir: Path) -> None:
 
 
 def write_input_txt_files(testdata_dir: Path) -> None:
+    """
+    Write observed hpo ids to txt files for a corpus.
+
+    Args:
+        testdata_dir (Path): Path to test data directory.
+    """
     for phenopacket_path in all_files(Path(testdata_dir).joinpath("phenopackets")):
         write_observed_hpo_ids(phenopacket_path, testdata_dir)
diff --git a/src/pheval_ai_marrvel/run/create_apptainer_commands.py b/src/pheval_ai_marrvel/run/create_apptainer_commands.py
@@ -8,6 +8,18 @@
 
 @dataclass
 class ApptainerArguments:
+    """
+    Arguments required for running AI-MARRVEL apptainer commands.
+
+    Attributes:
+        sample_id (str): The sample ID.
+        vcf_path (Path): The VCF file path.
+        vcf_assembly (str): The VCF assembly.
+        hpo_txt_file_path (Path): The hpo txt file path.
+        data_dependencies (Path): The data_dependencies path.
+        output_directory (Path): The output directory path.
+    """
+
     sample_id: str
     vcf_path: Path
     vcf_assembly: str
@@ -19,6 +31,18 @@ class ApptainerArguments:
 def get_apptainer_arguments(
     phenopacket_path: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
 ) -> ApptainerArguments:
+    """
+    Get apptainer arguments for running AI-MARRVEL apptainer commands for a phenopacket.
+
+    Args:
+        phenopacket_path (Path): The phenopacket path.
+        testdata_dir (Path): The testdata directory.
+        input_dir (Path): The input directory.
+        output_dir (Path): The output directory.
+
+    Returns:
+        ApptainerArgument: The arguments for running AI-MARRVEL apptainer commands.
+    """
     phenopacket = phenopacket_reader(phenopacket_path)
     vcf_file_data = PhenopacketUtil(phenopacket).vcf_file_data(
         phenopacket_path, testdata_dir.joinpath("vcf")
@@ -34,6 +58,15 @@ def get_apptainer_arguments(
 
 
 def create_apptainer_command(apptainer_arguments: ApptainerArguments) -> str:
+    """
+    Create an apptainer command for running AI-MARRVEL for a sample.
+
+    Args:
+        apptainer_arguments(ApptainerArguments): Arguments for running AI-MARRVEL with apptainer.
+
+    Returns:
+        str: The string apptainer command.
+    """
     return (
         f"apptainer run --mount type=bind,source={apptainer_arguments.vcf_path},destination=/input/vcf.gz"
         f" --mount type=bind,source={apptainer_arguments.hpo_txt_file_path},destination=/input/hpo.txt"
@@ -45,6 +78,14 @@ def create_apptainer_command(apptainer_arguments: ApptainerArguments) -> str:
 
 
 def write_commands(commands: List[str], tool_input_commands_dir: Path, testdata_dir: Path) -> None:
+    """
+    Write commands to a txt file.
+
+    Args:
+        commands (List[str]): The commands to write.
+        tool_input_commands_dir (Path): The tool input commands directory.
+        testdata_dir (Path): The testdata directory.
+    """
     joined_commands_str = "\n".join(commands)
     with open(
         f"{tool_input_commands_dir.joinpath(f'{testdata_dir.name}_commands.txt')}", "w"
@@ -56,6 +97,15 @@ def write_commands(commands: List[str], tool_input_commands_dir: Path, testdata_
 def create_apptainer_commands(
     tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
 ) -> None:
+    """
+    Create apptainer commands for running AI-MARRVEL apptainer with a corpus.
+
+    Args:
+        tool_input_commands_dir (Path): The tool input commands directory.
+        testdata_dir (Path): The testdata directory.
+        input_dir (Path): The input directory.
+        output_dir (Path): The output directory.
+    """
     all_commands = []
     for phenopacket_path in all_files(testdata_dir.joinpath("phenopackets")):
         apptainer_arguments = get_apptainer_arguments(

diff --git a/src/pheval_ai_marrvel/run/run.py b/src/pheval_ai_marrvel/run/run.py
@@ -5,8 +5,17 @@
 
 
 def run_commands(
-    tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir
+    tool_input_commands_dir: Path, testdata_dir: Path, input_dir: Path, output_dir: Path
 ) -> None:
+    """
+    Run the apptainer commands.
+
+    Args:
+        tool_input_commands_dir (Path): Path to the tool input commands directory.
+        testdata_dir (Path): Path to the test data directory.
+        input_dir (Path): Path to the input directory.
+        output_dir (Path): Path to the output directory.
+    """
     create_apptainer_commands(tool_input_commands_dir, testdata_dir, input_dir, output_dir)
     batch_file = tool_input_commands_dir.joinpath(f"{testdata_dir.name}_commands.txt")
     subprocess.run(

diff --git a/src/pheval_ai_marrvel/runner.py b/src/pheval_ai_marrvel/runner.py
@@ -19,16 +19,16 @@ class AIMARRVELRunner(PhEvalRunner):
 
     def prepare(self):
         """
-        Pre-process any data and inputs necessary to run the tool.
+        Pre-process phenopackets into tool accepted format.
         """
-        print("preparing")
+        print("creating HPO txt files from phenopackets")
         prepare_inputs(testdata_dir=self.testdata_dir)
 
     def run(self):
         """
-        Run the tool to produce the raw output.
+        Run AI-MARRVEL to produce the raw output.
         """
-        print("running with fake predictor")
+        print("running with AI-MARRVEL")
         run_commands(
             tool_input_commands_dir=self.tool_input_commands_dir,
             testdata_dir=self.testdata_dir,