diff --git a/bin/join-metadata-and-clades b/bin/join-metadata-and-clades index f78a1f90..8f28d8b8 100755 --- a/bin/join-metadata-and-clades +++ b/bin/join-metadata-and-clades @@ -93,7 +93,6 @@ def main(): metadata = pd.read_csv(args.metadata, index_col=METADATA_JOIN_COLUMN_NAME, sep='\t', low_memory=False, na_filter = False) - # Read and rename clade column to be more descriptive clades = pd.read_csv(args.nextclade_tsv, index_col=NEXTCLADE_JOIN_COLUMN_NAME, usecols=[NEXTCLADE_JOIN_COLUMN_NAME, *(set(column_map.keys()) - clades_21L_columns)], sep='\t', low_memory=True, dtype="object", na_filter = False) \ @@ -113,7 +112,11 @@ def main(): # Add clade_legacy column as Nextstrain_clade # Use yml mapping with open(args.clade_legacy_mapping, 'r') as legacy_mapping_file: - clade_legacy_mapping = yaml.safe_load(legacy_mapping_file) + clade_legacy_mapping_dict: dict[str, str] = yaml.safe_load(legacy_mapping_file) + + def clade_legacy_mapping(clade_nextstrain: str) -> str: + return clade_legacy_mapping_dict.get(clade_nextstrain, f"{clade_nextstrain} Omicron") + clades["Nextstrain_clade"] = clades["clade_nextstrain"].map(clade_legacy_mapping) # Remove immune_escape and ace2_binding when clade <21L and not recombinant diff --git a/defaults/clade-legacy-mapping.yml b/defaults/clade-legacy-mapping.yml index f43daa4f..12974700 100644 --- a/defaults/clade-legacy-mapping.yml +++ b/defaults/clade-legacy-mapping.yml @@ -20,24 +20,4 @@ 21F: 21F (Iota) 21G: 21G (Lambda) 21H: 21H (Mu) -21K: 21K (Omicron) -21L: 21L (Omicron) -21M: 21M (Omicron) -22A: 22A (Omicron) -22B: 22B (Omicron) -22C: 22C (Omicron) -22D: 22D (Omicron) -22E: 22E (Omicron) -22F: 22F (Omicron) -23A: 23A (Omicron) -23B: 23B (Omicron) -23C: 23C (Omicron) -23D: 23D (Omicron) -23E: 23E (Omicron) -23F: 23F (Omicron) -23G: 23G (Omicron) -23H: 23H (Omicron) -23I: 23I (Omicron) -24A: 24A (Omicron) -24B: 24B (Omicron) recombinant: recombinant