diff --git a/ppanggolin/formats/proksee_template.json b/ppanggolin/formats/proksee_template.json deleted file mode 100644 index a6952dbd..00000000 --- a/ppanggolin/formats/proksee_template.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "cgview": { - "settings":{ - "format":"circular", - "geneticCode":11, - "backgroundColor":"rgba(255,255,255,1)", - "showShading":true, - "arrowHeadLength":0.3, - "minArcLength":1, - "initialMapThicknessProportion":0.1, - "maxMapThicknessProportion":0.5}, - "backbone":{"color":"rgba(128,128,128,1)", - "colorAlternate":"rgba(200,200,200,1)", - "thickness":5, - "decoration":"arrow" - }, - "ruler": { - "font":"sans-serif,plain,10", - "color":"rgba(0,0,0,1)"}, - "annotation":{"font":"monospace,plain,12", - "onlyDrawFavorites":false, - "visible":true - }, - "dividers":{ - "track": { - "color": "rgba(50,50,50,1)", - "thickness": 1, - "spacing": 1 - }, - "slot":{ - "visible":true, - "color":"rgba(0,0,0,1)", - "thickness":1, - "spacing":1 - } - }, - "highlighter":{ - "visible":true - }, - "captions":[ - { - "position":"bottom-center", - "textAlignment":"center", - "font":"sans-serif,plain,24", - "fontColor":"rgba(0,0,0,1)", - "backgroundColor":"rgba(255,255,255,0.4)" - } - ], - "legend":{ - "position":"top-right", - "textAlignment":"left", - "defaultFont":"sans-serif,plain,14", - "defaultFontColor":"rgba(0,0,0,1)", - "backgroundColor":"rgba(255,255,255,0.75)" - }, - "sequence": { - "color": "rgb(0,0,0)", - "font": "sans-serif,plain,14" - }, - "bookmarks": [ - { - "bbOffset": 86.75, - "bp": 5617, - "favorite": false, - "format": "circular", - "name": "Bookmark-1", - "shortcut": "1", - "zoom": 1.685 - } - ], - "plots": [ - ] - } -} \ No newline at end of file diff --git a/ppanggolin/formats/write_proksee.py b/ppanggolin/formats/write_proksee.py index d87b43f1..5af353e7 100644 --- a/ppanggolin/formats/write_proksee.py +++ b/ppanggolin/formats/write_proksee.py @@ -11,7 +11,9 @@ from tqdm import tqdm from typing import Dict, List, Tuple, Set from uuid import uuid4 -from itertools import cycle +from itertools import cycle +from collections import defaultdict + # installed libraries from bokeh.palettes import Category20 from plotly.express.colors import qualitative @@ -38,11 +40,11 @@ def read_settings(settings_data: dict): settings_data["geneticCode"] = "11" -def write_legend_items(legend_data: dict, features: List[str], modules: Set[Module]): #, sources: List[str]): +def write_legend_items(features: List[str]):#, modules: Set[Module]): #, sources: List[str]): - colors = palette() + # colors = palette() # use https://medialab.github.io/iwanthue/ to find nice colors # that associate well with established partition colors (orange, light green, light blue) main_colors = { @@ -54,29 +56,31 @@ def write_legend_items(legend_data: dict, features: List[str], modules: Set[Modu "dark red": "#ca5c55", } - - legend_data["items"] = [ + legend_data = {"items" : [ {"name": "persistent", "swatchColor": main_colors['orange'], "decoration": "arrow"}, {"name": "shell", "swatchColor": main_colors['light green'], "decoration": "arrow"}, {"name": "cloud", "swatchColor": main_colors['light blue'], "decoration": "arrow"}, - {"name": "RNA", "swatchColor": main_colors['purple'], "decoration": "arrow"},] + {"name": "RNA", "swatchColor": main_colors['purple'], "decoration": "arrow"}, + ] + } if "rgp" in features or "all" in features: legend_data["items"].append({"name": "RGP", "swatchColor": main_colors['dark green'], "decoration": "arc"}), + # if "spots" in features or "all" in features: # legend_data["items"].append({"name": "Spot", "swatchColor": main_colors['dark red'], 1)", "decoration": "arc"}) if "modules" in features or "all" in features: - if modules is None: - legend_data["items"].append({"name": "Module", "swatchColor": main_colors['dark red'], "decoration": "arc"}) - else: - for mod in modules: - legend_data["items"].append({"name": f"module_{mod.ID}", "decoration": "arc", "swatchColor": next(colors)}) - + # if modules is None: + legend_data["items"].append({"name": "Module", "swatchColor": main_colors['dark red'], "decoration": "arc"}) + # else: + # for mod in modules: + # legend_data["items"].append({"name": f"module_{mod.ID}", "decoration": "arc", "swatchColor": next(colors)}) + return legend_data def write_tracks(features: List[str]): tracks = [{"name": "Gene", "separateFeaturesBy": "strand", "position": "outside", "thicknessRatio": 1, - "dataType": "feature", "dataMethod": "source", "dataKeys": "Gene"}, # {"name": "Partition", "separateFeaturesBy": "strand", "position": "outside", "thicknessRatio": 1, "dataType": "feature", "dataMethod": "source", "dataKeys": "partition"} + "dataType": "feature", "dataMethod": "source", "dataKeys": "Gene"} ] if "rgp" in features or "all" in features: @@ -122,8 +126,37 @@ def read_data(template: Path, features: List[str], modules: List[str] = None) -> proksee_data["cgview"]["tracks"] = write_tracks(features) return proksee_data +def initiate_proksee_data(features, org_name): + """ + + """ + + proksee_legends = write_legend_items(features) + + proksee_tracks = write_tracks(features) + + proksee_captions = { + "name": f"{org_name} annotated with PPanGGOLiN", + "position": "bottom-center", + "font": "sans-serif,plain,18", + "backgroundColor": "rgba(255,255,255,0.4)" + } + + cgview_data = {"name": "PPanGGOLiN annotations at genome levels", + "version": "1.5.0", + 'settings':{}, + "legend":proksee_legends, + "tracks":proksee_tracks, + "sequence":{}, + 'captions':[proksee_captions], + } + + return {"cgview":cgview_data} + def write_contig(organism: Organism, genome_sequences): + """ + """ contigs_data_list = [] for contig in tqdm(organism.contigs, unit="contig", disable=True): contig_info = {"name": contig.name, @@ -136,19 +169,15 @@ def write_contig(organism: Organism, genome_sequences): return contigs_data_list -def write_genes(organism: Organism, sources: List[str]=None): +def write_genes(organism: Organism, disable_bar=True): genes_data_list = [] - gf2gene = {} + gf2gene = defaultdict(list) - for gene in tqdm(organism.genes, total=organism.number_of_genes(), unit="genes", disable=True): + for gene in tqdm(organism.genes, total=organism.number_of_genes(), unit="genes", disable=disable_bar): gf = gene.family - if gf.name in gf2gene: - gf2gene[gf.name].append(gene) - else: - gf2gene[gf.name] = [gene] - # annotations = {source: "|".join(list(map(str, gf.get_source(source)))) for source in gf.sources if - # source in sources} + gf2gene[gf.name].append(gene) + genes_data_list.append({"name": gene.name, "type": "Gene", "contig": gene.contig.name, @@ -162,7 +191,7 @@ def write_genes(organism: Organism, sources: List[str]=None): "meta": ""#annotations }) - for gene in tqdm(organism.rna_genes, total=organism.number_of_rnas(), unit="rnas", disable=True): + for gene in tqdm(organism.rna_genes, total=organism.number_of_rnas(), unit="rnas", disable=disable_bar): genes_data_list.append({"name": gene.name, "type": "Gene", @@ -176,16 +205,13 @@ def write_genes(organism: Organism, sources: List[str]=None): "legend": "RNA", "meta": ""#annotations }) - - + return genes_data_list, gf2gene def write_partition(organism: Organism): partition_data_list = [] - c=0 for gene in tqdm(organism.genes, total=organism.number_of_genes(), unit="genes", disable=True): - c += 1 partition_data_list.append({"name": gene.family.name, "presence": gene.family.named_partition, "contig": gene.contig.name, @@ -195,7 +221,6 @@ def write_partition(organism: Organism): "legend": gene.family.named_partition, "tags": ["partition"]}) - return partition_data_list @@ -266,25 +291,23 @@ def write_modules(pangenome: Pangenome, organism: Organism, gf2genes: Dict[str, def write_proksee_organism(pangenome: Pangenome, organism: Organism, output: Path, template: Path, features: List[str] = None, modules: List[str] = None, genome_sequences= None): - proksee_data = read_data(template=template, features=features, modules=None) - - if "name" not in proksee_data["cgview"]["captions"]: - proksee_data["cgview"]["captions"][0]["name"] = f"{organism.name} annotated with PPanGGOLiN" + proksee_data = initiate_proksee_data(features, organism.name) + proksee_data["cgview"]["sequence"]["contigs"] = write_contig(organism, genome_sequences) - if "features" not in proksee_data["cgview"]: - proksee_data["cgview"]["features"] = [] - genes_features, gf2genes = write_genes(organism, sources=None) + genes_features, gf2genes = write_genes(organism) - proksee_data["cgview"]["features"] += genes_features - proksee_data["cgview"]["features"] += write_partition(organism) + proksee_data["cgview"]["features"] = genes_features + # proksee_data["cgview"]["features"] += write_partition(organism) if "rgp" in features or "all" in features: proksee_data["cgview"]["features"] += write_rgp(pangenome=pangenome, organism=organism) + # if "spots" in features or "all" in features: # proksee_data["cgview"]["features"] += write_spots(pangenome=pangenome, organism=organism, gf2genes=gf2genes) + if "modules" in features or "all" in features: proksee_data["cgview"]["features"] += write_modules(pangenome=pangenome, organism=organism, gf2genes=gf2genes) @@ -293,23 +316,23 @@ def write_proksee_organism(pangenome: Pangenome, organism: Organism, output: Pat json.dump(proksee_data, out_json, indent=2) -def write_proksee(pangenome: Pangenome, output: Path, features: List[str] = None, sources: List[str] = None, - template: Path = None, organisms_list: List[str] = None, threads: int = 1, disable_bar: bool = False): - assert features is not None - if template is None: - template = Path(__file__).parent.joinpath("proksee_template").with_suffix(".json") - if organisms_list is not None: - organisms = [organism for organism in pangenome.organisms if organism.name in organisms_list] - else: - organisms = pangenome.organisms - with ThreadPoolExecutor(max_workers=threads) as executor: - with tqdm(total=len(organisms), unit='organism', disable=disable_bar) as progress: - futures = [] - for organism in organisms: - future = executor.submit(write_proksee_organism, pangenome, organism, output, - template, features, sources) - future.add_done_callback(lambda p: progress.update()) - futures.append(future) - - for future in futures: - future.result() +# def write_proksee(pangenome: Pangenome, output: Path, features: List[str] = None, sources: List[str] = None, +# template: Path = None, organisms_list: List[str] = None, threads: int = 1, disable_bar: bool = False): +# assert features is not None +# if template is None: +# template = Path(__file__).parent.joinpath("proksee_template").with_suffix(".json") +# if organisms_list is not None: +# organisms = [organism for organism in pangenome.organisms if organism.name in organisms_list] +# else: +# organisms = pangenome.organisms +# with ThreadPoolExecutor(max_workers=threads) as executor: +# with tqdm(total=len(organisms), unit='organism', disable=disable_bar) as progress: +# futures = [] +# for organism in organisms: +# future = executor.submit(write_proksee_organism, pangenome, organism, output, +# template, features, sources) +# future.add_done_callback(lambda p: progress.update()) +# futures.append(future) + +# for future in futures: +# future.result()