Skip to content

Commit

Permalink
Parse new RKI JSON blob for pango_lineage
Browse files Browse the repository at this point in the history
This is currently taking the first lineage value from the JSON blob.
We may need to revisit this to loop and extract the latest lineage value.
  • Loading branch information
j23414 committed Sep 12, 2024
1 parent 2df0af9 commit 7767c76
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions lib/utils/transformpipeline/transforms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
import re
import unicodedata
import json
from collections import defaultdict
from typing import Any, Collection, List, MutableMapping, Sequence, Tuple , Dict , Union
import pandas as pd
Expand Down Expand Up @@ -287,6 +288,8 @@ def __init__(self):
def transform_value(self, entry: dict) -> dict:
entry['sequence'] = entry['sequence'].replace('\n', '')
entry['length'] = len(entry['sequence'])
lineage_dict = json.loads(entry['pango_lineage'])
entry['pango_lineage'] = lineage_dict[0]['lineage']

# Normalize all string data to Unicode Normalization Form C, for
# consistent, predictable string comparisons.
Expand Down

0 comments on commit 7767c76

Please sign in to comment.