Skip to content

Commit

Permalink
add source arg to monitor metadata source and add test accordingly
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanMainguy committed Nov 8, 2023
1 parent c31ea5b commit 34d1521
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
23 changes: 18 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,10 @@ jobs:
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db1 -m metadata/metadata_genes.tsv -a genes
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db2 -m metadata/metadata_genomes.tsv -a genomes
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db3 -m metadata/metadata_families.tsv -a families --omit
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db4 -m metadata/metadata_rgps.tsv -a RGPs
ppanggolin write_pangenome -p mybasicpangenome/pangenome.h5 --output mybasicpangenome -f --gexf --light_gexf --cpu 1
ppanggolin rgp_cluster --pangenome mybasicpangenome/pangenome.h5 -o rgp_cluster_with_metadata --graph_formats graphml
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db4 -m metadata/metadata_rgps.tsv -a RGPs
ppanggolin write_pangenome -p mybasicpangenome/pangenome.h5 --output mybasicpangenome -f --gexf --light_gexf --cpu 1
ppanggolin rgp_cluster --pangenome mybasicpangenome/pangenome.h5 -o rgp_cluster_with_metadata --graph_formats graphml
cd -
- name: testing config file
shell: bash -l {0}
Expand Down Expand Up @@ -151,6 +152,18 @@ jobs:
cd testingDataset
head organisms.gbff.list | cut -f1 > organisms_names.gbff.head.list
ppanggolin write_genomes -p myannopang/pangenome.h5 --output flat_genomes_from_file_org -f --anno organisms.gbff.list --gff --organisms organisms_names.gbff.head.list
ppanggolin write_genomes -p myannopang/pangenome.h5 --output flat_genomes_from_file_org -f \
--anno organisms.gbff.list --gff --organisms organisms_names.gbff.head.list
ppanggolin write_genomes -p stepbystep/pangenome.h5 --output flat_genomes_from_cmdline_orgs --proksee \
--organisms GCF_006508185.1_ASM650818v1_genomic,GCF_002088315.1_ASM208831v1_genomic
head organisms.fasta.list | cut -f1 > organisms_names.fasta.head.list
# Default separator is a pipe but a pipe is found in a value of metadata db1. That is why we use another separator here.
ppanggolin write_genomes -p mybasicpangenome/pangenome.h5 --output mybasicpangenome/genomes_outputs \
--organisms organisms_names.fasta.head.list \
-f --gff --add_metadata --metadata_sep §
ppanggolin write_genomes -p stepbystep/pangenome.h5 --output flat_genomes_from_cmdline_orgs --proksee --organisms GCF_006508185.1_ASM650818v1_genomic,GCF_002088315.1_ASM208831v1_genomic
# Pipe separatore is found in metadata source db1. if we don't require this source then the writting with pipe is work fine.
ppanggolin write_genomes -p mybasicpangenome/pangenome.h5 --output mybasicpangenome/genomes_outputs_with_metadata \
-f --gff --add_metadata --metadata_sources db2 db3 db4
14 changes: 10 additions & 4 deletions ppanggolin/formats/writeFlatGenomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,8 @@ def get_organism_list(organisms_filt: str, pangenome: Pangenome) -> Set[Organism

def write_flat_genome_files(pangenome: Pangenome, output: Path,
table: bool = False, gff: bool = False, proksee: bool = False, compress: bool = False,
disable_bar: bool = False, fasta=None, anno=None, organisms_filt: str ="all", add_metadata=False, metadata_sep="|"):
disable_bar: bool = False, fasta=None, anno=None, organisms_filt: str ="all",
add_metadata=False, metadata_sep="|", metadata_sources:List[str]=None):
"""
Main function to write flat files from pangenome
Expand All @@ -422,6 +423,7 @@ def write_flat_genome_files(pangenome: Pangenome, output: Path,
:param anno: File containing the list of GBFF/GFF files for each organism
:param organism_filt: String used to specify which organism to write. if all, all organisms are written.
:param metadata_sep: The separator used to join multiple metadata values for element with multiple metadata values from the same source.
:param metadata_sources: Sources of the metadata to use and write in the outputs. None means all sources are used.
"""

Expand All @@ -441,7 +443,7 @@ def write_flat_genome_files(pangenome: Pangenome, output: Path,

check_pangenome_info(pangenome, need_annotations=needAnnotations, need_families=needFamilies,need_graph=need_graph,
need_partitions=needPartitions, need_rgp=needRegions, need_spots=needSpots,
need_modules=needModules, need_metadata=add_metadata,
need_modules=needModules, need_metadata=add_metadata, sources=metadata_sources,
disable_bar=disable_bar)


Expand Down Expand Up @@ -535,7 +537,7 @@ def launch(args: argparse.Namespace):
write_flat_genome_files(pangenome, args.output,
table=args.table, gff=args.gff, proksee=args.proksee,
compress=args.compress, disable_bar=args.disable_prog_bar, fasta=args.fasta, anno=args.anno,
organisms_filt=args.organisms, add_metadata=args.add_metadata, metadata_sep=args.metadata_sep)
organisms_filt=args.organisms, add_metadata=args.add_metadata, metadata_sep=args.metadata_sep, metadata_sources=args.metadata_sources)


def subparser(sub_parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
Expand Down Expand Up @@ -587,7 +589,11 @@ def parser_flat(parser: argparse.ArgumentParser):
required=False,
action="store_true",
help="Include metadata information in the output files if any have been added to pangenome elements (see ppanggolin metadata command).")

optional.add_argument("--metadata_sources",
default=None,
nargs="+",
help="Which source of metadata should be written. By default all metadata sources are included.")

optional.add_argument("--metadata_sep",
required=False,
default='|',
Expand Down

0 comments on commit 34d1521

Please sign in to comment.