From bf33d321b51772d8178a6c6250c506efa46b56b9 Mon Sep 17 00:00:00 2001 From: Lucie COFFION Date: Fri, 22 Nov 2024 17:28:03 +0100 Subject: [PATCH 1/2] Add -use_pseudo option to ppanggolin all. --- ppanggolin/workflow/all.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ppanggolin/workflow/all.py b/ppanggolin/workflow/all.py index 3ce60b7c..9898c127 100644 --- a/ppanggolin/workflow/all.py +++ b/ppanggolin/workflow/all.py @@ -602,6 +602,14 @@ def add_workflow_args(parser: argparse.ArgumentParser): "it will be assigned to its own unique cluster as a singleton.", ) + optional.add_argument( + "--use_pseudo", + required=False, + action="store_true", + help="In the context of provided annotation, use this option to read pseudogenes. " + "(Default behavior is to ignore them)", + ) + optional.add_argument( "-K", "--nb_of_partitions", From f01029569df6df9edcee4480a3ad9164a8cd3086 Mon Sep 17 00:00:00 2001 From: Lucie COFFION Date: Fri, 22 Nov 2024 17:30:47 +0100 Subject: [PATCH 2/2] Add RGP score in the regions output. --- docs/user/RGP/rgpOutputs.md | 1 + ppanggolin/formats/writeFlatPangenome.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/user/RGP/rgpOutputs.md b/docs/user/RGP/rgpOutputs.md index 317230c1..e9a86b33 100644 --- a/docs/user/RGP/rgpOutputs.md +++ b/docs/user/RGP/rgpOutputs.md @@ -25,6 +25,7 @@ The file has the following format : | stop | The stop position of the RGP in the contig. | | length | The length of the RGP in nucleotide | | coordinates | The coordinates of the region. If the region overlap the contig edges will be right with join coordinates syntax (*i.e* 1523..1758,1..57) | +| score | Score of the RGP. | | contigBorder | This is a boolean column. If the RGP is on a contig border it will be True, otherwise, it will be False. This often can indicate that, if an RGP is on a contig border it is probably not complete. | | wholeContig | This is a boolean column. If the RGP is an entire contig, it will be True, and False otherwise. If a RGP is an entire contig it can possibly be a plasmid, a region flanked with repeat sequences or a contaminant. | diff --git a/ppanggolin/formats/writeFlatPangenome.py b/ppanggolin/formats/writeFlatPangenome.py index 5ef84f4b..5de33763 100644 --- a/ppanggolin/formats/writeFlatPangenome.py +++ b/ppanggolin/formats/writeFlatPangenome.py @@ -1096,6 +1096,7 @@ def write_rgp_table(regions: Set[Region], output: Path, compress: bool = False): "stop", "length", "coordinates", + "score", "contigBorder", "wholeContig", ] @@ -1117,6 +1118,7 @@ def write_rgp_table(regions: Set[Region], output: Path, compress: bool = False): "stop": region.stop, "length": region.length, "coordinates": region.string_coordinates(), + "score": region.score, "contigBorder": region.is_contig_border, "wholeContig": region.is_whole_contig, }