Skip to content

Commit

Permalink
feat: Format GDC to bdikit
Browse files Browse the repository at this point in the history
  • Loading branch information
roquelopez committed Nov 20, 2024
1 parent b294360 commit bbaf1ba
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
36 changes: 36 additions & 0 deletions scripts/format_gdc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import json
from os.path import join, dirname


RAW_GDC_PATH = join(dirname(__file__), "./gdc_schema.json")
FORMATTED_GDC_PATH = join(dirname(__file__), "../bdikit/resource/gdc_data.json")

metadata = {}


with open(RAW_GDC_PATH) as json_file:
gdc_schema = json.load(json_file)

for attrib_data in gdc_schema.values():
for attrib_name, attrib_properties in attrib_data["properties"].items():
metadata[attrib_name] = {}
attrib_description = attrib_properties.get("description", "")
metadata[attrib_name]["column_description"] = attrib_description

value_names = attrib_properties.get("enum", [])
# metadata[attrib_name]["value_names"] = value_names

descriptions = attrib_properties.get("enumDef", {})
value_descriptions = []
for value_name in value_names:
description = ""
if value_name in descriptions:
description = descriptions[value_name].get("description", "")
value_descriptions.append(description)

metadata[attrib_name]["value_data"] = dict(zip(value_names, value_descriptions))

with open(FORMATTED_GDC_PATH, "w") as f:
json.dump(metadata, f, indent=4)

print("GDC schema formatted successfully.")
1 change: 1 addition & 0 deletions scripts/gdc_schema.json

Large diffs are not rendered by default.

0 comments on commit bbaf1ba

Please sign in to comment.