From 86785a5e8ccdf95cd8f84f320c0f6c20553ba3b2 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 1 Jul 2024 19:36:16 +0200 Subject: [PATCH] ignore weird RNA genes when reading pangneome file --- ppanggolin/formats/readBinaries.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ppanggolin/formats/readBinaries.py b/ppanggolin/formats/readBinaries.py index 85c1e7ac..fbfa00aa 100644 --- a/ppanggolin/formats/readBinaries.py +++ b/ppanggolin/formats/readBinaries.py @@ -547,6 +547,13 @@ def read_rnas(pangenome: Pangenome, table: tables.Table, genedata_dict: Dict[int for row in tqdm(read_chunks(table, chunk=chunk_size), total=table.nrows, unit="gene", disable=disable_bar): rna = RNA(row["ID"].decode()) genedata = genedata_dict[row["genedata_id"]] + if genedata.start > genedata.stop: + logging.warning(f"Wrong coordinates in RNA gene {genedata.name}: Start ({genedata.start}) should not be greater than stop ({genedata.stop}). This gene is ignored.") + continue + if genedata.start < 1 or genedata.stop < 1: + logging.warning(f"Wrong coordinates in RNA gene {genedata.name}: Start ({genedata.start}) and stop ({genedata.stop}) should be greater than 0. This gene is ignored.") + continue + rna.fill_annotations(start=genedata.start, stop=genedata.stop, strand=genedata.strand, gene_type=genedata.gene_type, name=genedata.name, product=genedata.product)