Skip to content

Commit

Permalink
Add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
jpjarnoux committed Sep 15, 2023
1 parent b4768ae commit f4e6648
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 17 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.185
1.2.186
49 changes: 38 additions & 11 deletions ppanggolin/formats/readBinaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,21 @@ class Genedata:
"""
This is a general class storing unique gene-related data to be written in a specific
genedata table
:param start: Gene start position
:param stop: Gene stop position
:param strand: Associated strand
:param gene_type: Gene type
:param position: Position of the gene on its contig
:param name: Name of the feature
:param product: Associated product
:param genetic_code: associated genetic code, if any
"""

def __init__(self, start: int, stop: int, strand: str, gene_type: str, position: int, name: str, product: str,
genetic_code: int):
"""Constructor method
:param start: Gene start position
:param stop: Gene stop position
:param strand: Associated strand
:param gene_type: Gene type
:param position: Position of the gene on its contig
:param name: Name of the feature
:param product: Associated product
:param genetic_code: associated genetic code, if any
"""
self.start = start
self.stop = stop
self.strand = strand
Expand All @@ -47,7 +49,7 @@ def __init__(self, start: int, stop: int, strand: str, gene_type: str, position:
self.product = product
self.genetic_code = genetic_code

def __eq__(self, other):
def __eq__(self, other: Genedata):
return self.start == other.start \
and self.stop == other.stop \
and self.strand == other.strand \
Expand Down Expand Up @@ -166,7 +168,9 @@ def read_chunks(table: Table, column: str = None, chunk: int = 10000):
def read_genedata(h5f: tables.File) -> Dict[int, Genedata]:
"""
Reads the genedata table and returns a genedata_id2genedata dictionnary
:param h5f: the hdf5 file handler
:return: dictionnary linking genedata to the genedata identifier
"""
table = h5f.root.annotations.genedata
Expand Down Expand Up @@ -386,6 +390,13 @@ def read_modules(pangenome: Pangenome, h5f: tables.File, disable_bar: bool = Fal

def read_organisms(pangenome: Pangenome, table: tables.Table, chunk_size: int = 20000,
disable_bar: bool = False):
"""Read organism table in pangenome file to add them to the pangenome object
:param pangenome: Pangenome object
:param table: Organism table
:param chunk_size: Size of the chunck reading
:param disable_bar: Disable progress bar
"""
contig2organism = {}
for row in tqdm(read_chunks(table, chunk=chunk_size), total=table.nrows, unit="genome", disable=disable_bar):
organism = Organism(row["name"].decode())
Expand All @@ -394,6 +405,13 @@ def read_organisms(pangenome: Pangenome, table: tables.Table, chunk_size: int =

def read_contigs(pangenome: Pangenome, table: tables.Table, chunk_size: int = 20000,
disable_bar: bool = False):
"""Read contig table in pangenome file to add them to the pangenome object
:param pangenome: Pangenome object
:param table: Contig table
:param chunk_size: Size of the chunck reading
:param disable_bar: Disable progress bar
"""
for row in tqdm(read_chunks(table, chunk=chunk_size), total=table.nrows, unit="contig", disable=disable_bar):
contig = Contig(name=row["name"].decode())
contig.is_circular = row["is_circular"]
Expand Down Expand Up @@ -563,7 +581,16 @@ def read_modules_info(h5f: tables.File):
f"\t\t\t- mean: {info_group._v_attrs['StatOfFamiliesInModules']['mean']}")


def read_metadata(pangenome: Pangenome, h5f: tables.File, metatype: str, sources: List[str] = None, disable_bar: bool = False):
def read_metadata(pangenome: Pangenome, h5f: tables.File, metatype: str,
sources: List[str] = None, disable_bar: bool = False):
"""Read metadata to add them to the pangenome object
:param pangenome: Pangenome object
:param h5f: Pangenome file
:param metatype: Object type to associate metadata
:param sources: Source name of metadata
:param disable_bar: Disable progress bar
"""
metadata_group = h5f.root.metadata._f_get_child(metatype)
for source in sources:
source_table = metadata_group._f_get_child(source)
Expand Down
12 changes: 7 additions & 5 deletions ppanggolin/formats/writeAnnotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def write_rnas(pangenome: Pangenome, h5f: tables.File, annotation: tables.Group
return genedata2rna


def genedata_desc(type_len, name_len, product_len):
def genedata_desc(type_len: int, name_len: int, product_len: int) -> Dict[str, Union[tables.UIntCol, tables.StringCol]]:
"""
Creates a table for gene-related data
Expand Down Expand Up @@ -311,8 +311,8 @@ def write_genedata(pangenome: Pangenome, h5f: tables.File, annotation: tables.G
genedata_table.flush()


def write_annotations(pangenome: Pangenome, h5f: tables.File, rec_organisms: bool = True,
rec_contigs: bool = True, rec_genes: bool = True, rec_rnas: bool = True, disable_bar: bool = False):
def write_annotations(pangenome: Pangenome, h5f: tables.File, rec_organisms: bool = True, rec_contigs: bool = True,
rec_genes: bool = True, rec_rnas: bool = True, disable_bar: bool = False):
"""Function writing all the pangenome annotations
:param pangenome: Annotated pangenome
Expand Down Expand Up @@ -361,11 +361,13 @@ def get_gene_sequences_len(pangenome: Pangenome) -> Tuple[int, int]:
return max_gene_id_len, max_gene_type


def gene_sequences_desc(gene_id_len, gene_type_len) -> dict:
def gene_sequences_desc(gene_id_len: int, gene_type_len: int) -> Dict[str, Union[tables.UIntCol, tables.StringCol]]:
"""
Create table to save gene sequences
:param gene_id_len: Maximum size of gene sequence identifier
:param gene_type_len: Maximum size of gene type
:return: Formated table
"""
return {
Expand All @@ -388,7 +390,7 @@ def get_sequence_len(pangenome: Pangenome) -> int:
return max_seq_len


def sequence_desc(max_seq_len: int) -> dict:
def sequence_desc(max_seq_len: int) -> Dict[str, Union[tables.UIntCol, tables.StringCol]]:
"""
Table description to save sequences
:param max_seq_len: Maximum size of gene type
Expand Down

0 comments on commit f4e6648

Please sign in to comment.