Skip to content

Commit

Permalink
Make metadata options optional
Browse files Browse the repository at this point in the history
Preparing for sequence support.
  • Loading branch information
victorlin committed Aug 30, 2024
1 parent 96211b9 commit eb5a01a
Showing 1 changed file with 23 additions and 6 deletions.
29 changes: 23 additions & 6 deletions augur/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,30 +131,47 @@ def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("merge", help=first_line(__doc__))

input_group = parser.add_argument_group("inputs", "options related to input")
input_group.add_argument("--metadata", nargs="+", action="extend", required=True, metavar="NAME=FILE", help="Required. Metadata table names and file paths. Names are arbitrary monikers used solely for referring to the associated input file in other arguments and in output column names. Paths must be to seekable files, not unseekable streams. Compressed files are supported." + SKIP_AUTO_DEFAULT_IN_HELP)
input_group.add_argument("--metadata", nargs="+", action="extend", metavar="NAME=FILE", help="Required. Metadata table names and file paths. Names are arbitrary monikers used solely for referring to the associated input file in other arguments and in output column names. Paths must be to seekable files, not unseekable streams. Compressed files are supported." + SKIP_AUTO_DEFAULT_IN_HELP)

input_group.add_argument("--metadata-id-columns", default=DEFAULT_ID_COLUMNS, nargs="+", action=ExtendOverwriteDefault, metavar="[TABLE=]COLUMN", help=f"Possible metadata column names containing identifiers, considered in the order given. Columns will be considered for all metadata tables by default. Table-specific column names may be given using the same names assigned in --metadata. Only one ID column will be inferred for each table. (default: {' '.join(map(shquote_humanized, DEFAULT_ID_COLUMNS))})" + SKIP_AUTO_DEFAULT_IN_HELP)
input_group.add_argument("--metadata-delimiters", default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault, metavar="[TABLE=]CHARACTER", help=f"Possible field delimiters to use for reading metadata tables, considered in the order given. Delimiters will be considered for all metadata tables by default. Table-specific delimiters may be given using the same names assigned in --metadata. Only one delimiter will be inferred for each table. (default: {' '.join(map(shquote_humanized, DEFAULT_DELIMITERS))})" + SKIP_AUTO_DEFAULT_IN_HELP)

output_group = parser.add_argument_group("outputs", "options related to output")
output_group.add_argument('--output-metadata', required=True, metavar="FILE", help="Required. Merged metadata as TSV. Compressed files are supported." + SKIP_AUTO_DEFAULT_IN_HELP)
output_group.add_argument('--output-metadata', metavar="FILE", help="Required. Merged metadata as TSV. Compressed files are supported." + SKIP_AUTO_DEFAULT_IN_HELP)
output_group.add_argument('--quiet', action="store_true", default=False, help="Suppress informational and warning messages normally written to stderr. (default: disabled)" + SKIP_AUTO_DEFAULT_IN_HELP)

return parser


def validate_arguments(args):
# These will make more sense when sequence support is added.
if not args.metadata:
raise AugurError("At least one input must be specified.")
if not args.output_metadata:
raise AugurError("At least one output must be specified.")

if args.output_metadata and not args.metadata:
raise AugurError("--output-metadata requires --metadata.")


def run(args):
global print_info

if args.quiet:
print_info = lambda *_: None

# Catch user errors early to avoid unnecessary computation.
validate_arguments(args)

db = Database()

metadata = get_metadata(args.metadata, args.metadata_id_columns, args.metadata_delimiters)
output_columns = get_output_columns(metadata)
load_metadata(db, metadata)
merge_metadata(db, metadata, output_columns, args.output_metadata)
if args.metadata:
metadata = get_metadata(args.metadata, args.metadata_id_columns, args.metadata_delimiters)
output_columns = get_output_columns(metadata)
load_metadata(db, metadata)

if args.output_metadata:
merge_metadata(db, metadata, output_columns, args.output_metadata)


def get_metadata(
Expand Down

0 comments on commit eb5a01a

Please sign in to comment.