Skip to content

Commit

Permalink
remove the need of a proksee template
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanMainguy committed Oct 17, 2023
1 parent 0b6f6f4 commit 32bd30a
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 131 deletions.
74 changes: 0 additions & 74 deletions ppanggolin/formats/proksee_template.json

This file was deleted.

137 changes: 80 additions & 57 deletions ppanggolin/formats/write_proksee.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from tqdm import tqdm
from typing import Dict, List, Tuple, Set
from uuid import uuid4
from itertools import cycle
from itertools import cycle
from collections import defaultdict

# installed libraries
from bokeh.palettes import Category20
from plotly.express.colors import qualitative
Expand All @@ -38,11 +40,11 @@ def read_settings(settings_data: dict):
settings_data["geneticCode"] = "11"


def write_legend_items(legend_data: dict, features: List[str], modules: Set[Module]): #, sources: List[str]):
def write_legend_items(features: List[str]):#, modules: Set[Module]): #, sources: List[str]):



colors = palette()
# colors = palette()
# use https://medialab.github.io/iwanthue/ to find nice colors
# that associate well with established partition colors (orange, light green, light blue)
main_colors = {
Expand All @@ -54,29 +56,31 @@ def write_legend_items(legend_data: dict, features: List[str], modules: Set[Modu
"dark red": "#ca5c55",
}


legend_data["items"] = [
legend_data = {"items" : [
{"name": "persistent", "swatchColor": main_colors['orange'], "decoration": "arrow"},
{"name": "shell", "swatchColor": main_colors['light green'], "decoration": "arrow"},
{"name": "cloud", "swatchColor": main_colors['light blue'], "decoration": "arrow"},
{"name": "RNA", "swatchColor": main_colors['purple'], "decoration": "arrow"},]
{"name": "RNA", "swatchColor": main_colors['purple'], "decoration": "arrow"},
]
}
if "rgp" in features or "all" in features:
legend_data["items"].append({"name": "RGP", "swatchColor": main_colors['dark green'], "decoration": "arc"}),

# if "spots" in features or "all" in features:
# legend_data["items"].append({"name": "Spot", "swatchColor": main_colors['dark red'], 1)", "decoration": "arc"})

if "modules" in features or "all" in features:
if modules is None:
legend_data["items"].append({"name": "Module", "swatchColor": main_colors['dark red'], "decoration": "arc"})
else:
for mod in modules:
legend_data["items"].append({"name": f"module_{mod.ID}", "decoration": "arc", "swatchColor": next(colors)})

# if modules is None:
legend_data["items"].append({"name": "Module", "swatchColor": main_colors['dark red'], "decoration": "arc"})
# else:
# for mod in modules:
# legend_data["items"].append({"name": f"module_{mod.ID}", "decoration": "arc", "swatchColor": next(colors)})
return legend_data

def write_tracks(features: List[str]):

tracks = [{"name": "Gene", "separateFeaturesBy": "strand", "position": "outside", "thicknessRatio": 1,
"dataType": "feature", "dataMethod": "source", "dataKeys": "Gene"}, # {"name": "Partition", "separateFeaturesBy": "strand", "position": "outside", "thicknessRatio": 1, "dataType": "feature", "dataMethod": "source", "dataKeys": "partition"}
"dataType": "feature", "dataMethod": "source", "dataKeys": "Gene"}
]

if "rgp" in features or "all" in features:
Expand Down Expand Up @@ -122,8 +126,37 @@ def read_data(template: Path, features: List[str], modules: List[str] = None) ->
proksee_data["cgview"]["tracks"] = write_tracks(features)
return proksee_data

def initiate_proksee_data(features, org_name):
"""
"""

proksee_legends = write_legend_items(features)

proksee_tracks = write_tracks(features)

proksee_captions = {
"name": f"{org_name} annotated with PPanGGOLiN",
"position": "bottom-center",
"font": "sans-serif,plain,18",
"backgroundColor": "rgba(255,255,255,0.4)"
}

cgview_data = {"name": "PPanGGOLiN annotations at genome levels",
"version": "1.5.0",
'settings':{},
"legend":proksee_legends,
"tracks":proksee_tracks,
"sequence":{},
'captions':[proksee_captions],
}

return {"cgview":cgview_data}


def write_contig(organism: Organism, genome_sequences):
"""
"""
contigs_data_list = []
for contig in tqdm(organism.contigs, unit="contig", disable=True):
contig_info = {"name": contig.name,
Expand All @@ -136,19 +169,15 @@ def write_contig(organism: Organism, genome_sequences):
return contigs_data_list


def write_genes(organism: Organism, sources: List[str]=None):
def write_genes(organism: Organism, disable_bar=True):
genes_data_list = []
gf2gene = {}
gf2gene = defaultdict(list)

for gene in tqdm(organism.genes, total=organism.number_of_genes(), unit="genes", disable=True):
for gene in tqdm(organism.genes, total=organism.number_of_genes(), unit="genes", disable=disable_bar):

gf = gene.family
if gf.name in gf2gene:
gf2gene[gf.name].append(gene)
else:
gf2gene[gf.name] = [gene]
# annotations = {source: "|".join(list(map(str, gf.get_source(source)))) for source in gf.sources if
# source in sources}
gf2gene[gf.name].append(gene)

genes_data_list.append({"name": gene.name,
"type": "Gene",
"contig": gene.contig.name,
Expand All @@ -162,7 +191,7 @@ def write_genes(organism: Organism, sources: List[str]=None):
"meta": ""#annotations
})

for gene in tqdm(organism.rna_genes, total=organism.number_of_rnas(), unit="rnas", disable=True):
for gene in tqdm(organism.rna_genes, total=organism.number_of_rnas(), unit="rnas", disable=disable_bar):

genes_data_list.append({"name": gene.name,
"type": "Gene",
Expand All @@ -176,16 +205,13 @@ def write_genes(organism: Organism, sources: List[str]=None):
"legend": "RNA",
"meta": ""#annotations
})



return genes_data_list, gf2gene


def write_partition(organism: Organism):
partition_data_list = []
c=0
for gene in tqdm(organism.genes, total=organism.number_of_genes(), unit="genes", disable=True):
c += 1
partition_data_list.append({"name": gene.family.name,
"presence": gene.family.named_partition,
"contig": gene.contig.name,
Expand All @@ -195,7 +221,6 @@ def write_partition(organism: Organism):
"legend": gene.family.named_partition,
"tags": ["partition"]})


return partition_data_list


Expand Down Expand Up @@ -266,25 +291,23 @@ def write_modules(pangenome: Pangenome, organism: Organism, gf2genes: Dict[str,
def write_proksee_organism(pangenome: Pangenome, organism: Organism, output: Path, template: Path,
features: List[str] = None, modules: List[str] = None, genome_sequences= None):

proksee_data = read_data(template=template, features=features, modules=None)

if "name" not in proksee_data["cgview"]["captions"]:
proksee_data["cgview"]["captions"][0]["name"] = f"{organism.name} annotated with PPanGGOLiN"
proksee_data = initiate_proksee_data(features, organism.name)


proksee_data["cgview"]["sequence"]["contigs"] = write_contig(organism, genome_sequences)

if "features" not in proksee_data["cgview"]:
proksee_data["cgview"]["features"] = []

genes_features, gf2genes = write_genes(organism, sources=None)
genes_features, gf2genes = write_genes(organism)

proksee_data["cgview"]["features"] += genes_features
proksee_data["cgview"]["features"] += write_partition(organism)
proksee_data["cgview"]["features"] = genes_features
# proksee_data["cgview"]["features"] += write_partition(organism)

if "rgp" in features or "all" in features:
proksee_data["cgview"]["features"] += write_rgp(pangenome=pangenome, organism=organism)

# if "spots" in features or "all" in features:
# proksee_data["cgview"]["features"] += write_spots(pangenome=pangenome, organism=organism, gf2genes=gf2genes)

if "modules" in features or "all" in features:
proksee_data["cgview"]["features"] += write_modules(pangenome=pangenome, organism=organism, gf2genes=gf2genes)

Expand All @@ -293,23 +316,23 @@ def write_proksee_organism(pangenome: Pangenome, organism: Organism, output: Pat
json.dump(proksee_data, out_json, indent=2)


def write_proksee(pangenome: Pangenome, output: Path, features: List[str] = None, sources: List[str] = None,
template: Path = None, organisms_list: List[str] = None, threads: int = 1, disable_bar: bool = False):
assert features is not None
if template is None:
template = Path(__file__).parent.joinpath("proksee_template").with_suffix(".json")
if organisms_list is not None:
organisms = [organism for organism in pangenome.organisms if organism.name in organisms_list]
else:
organisms = pangenome.organisms
with ThreadPoolExecutor(max_workers=threads) as executor:
with tqdm(total=len(organisms), unit='organism', disable=disable_bar) as progress:
futures = []
for organism in organisms:
future = executor.submit(write_proksee_organism, pangenome, organism, output,
template, features, sources)
future.add_done_callback(lambda p: progress.update())
futures.append(future)

for future in futures:
future.result()
# def write_proksee(pangenome: Pangenome, output: Path, features: List[str] = None, sources: List[str] = None,
# template: Path = None, organisms_list: List[str] = None, threads: int = 1, disable_bar: bool = False):
# assert features is not None
# if template is None:
# template = Path(__file__).parent.joinpath("proksee_template").with_suffix(".json")
# if organisms_list is not None:
# organisms = [organism for organism in pangenome.organisms if organism.name in organisms_list]
# else:
# organisms = pangenome.organisms
# with ThreadPoolExecutor(max_workers=threads) as executor:
# with tqdm(total=len(organisms), unit='organism', disable=disable_bar) as progress:
# futures = []
# for organism in organisms:
# future = executor.submit(write_proksee_organism, pangenome, organism, output,
# template, features, sources)
# future.add_done_callback(lambda p: progress.update())
# futures.append(future)

# for future in futures:
# future.result()

0 comments on commit 32bd30a

Please sign in to comment.