Skip to content

Commit

Permalink
add overlap correction when parsing external fasta along GFF files
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanMainguy committed Nov 18, 2024
1 parent c1a961b commit 7ee9bd4
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions ppanggolin/annotate/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,12 +1034,10 @@ def check_chevrons_in_start_and_stop(
dbxref_metadata
)

if fields_gff[gff_seqname] in circular_contigs or (
if contig is not None and (
"IS_CIRCULAR" in attributes
and attributes["IS_CIRCULAR"] == "true"
):
# WARNING: In case we have prodigal gff with is_circular attributes.
# This would fail as contig is not defined. However, is_circular should not be found in prodigal gff
logging.getLogger("PPanGGOLiN").debug(
f"Contig {contig.name} is circular."
)
Expand Down Expand Up @@ -1201,6 +1199,7 @@ def check_chevrons_in_start_and_stop(
contig_sequences = get_contigs_from_fasta_file(org, fasta_string.split("\n"))

correct_putative_overlaps(org.contigs)

for contig in org.contigs:

for gene in contig.genes:
Expand Down Expand Up @@ -1611,6 +1610,14 @@ def get_gene_sequences_from_fastas(
with read_compressed_or_not(Path(elements[1])) as currFastaFile:
fasta_dict[org] = get_contigs_from_fasta_file(org, currFastaFile)

# When dealing with GFF files, some genes may have coordinates extending beyond the actual
# length of contigs, especially when they overlap the edges. This usually needs to be split
# into two parts to handle the circular genome wrapping.
# If the GFF file lacks associated FASTA sequences and it was not possible to determine the
# contig length from the GFF file, we must apply this correction while parsing the external FASTA file.

correct_putative_overlaps(org.contigs)

if set(pangenome.organisms) > set(fasta_dict.keys()):
missing = pangenome.number_of_organisms - len(
set(pangenome.organisms) & set(fasta_dict.keys())
Expand Down

0 comments on commit 7ee9bd4

Please sign in to comment.