Skip to content

Commit

Permalink
generates IRIs for labels not found in crosswalk (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
vickydaiya authored Oct 25, 2023
1 parent d8d0453 commit a3533a9
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions containers/crosswalking/context/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import anndata
import pandas as pd
from pathlib import Path
import re


def filter_crosswalk_table(
Expand All @@ -21,6 +22,14 @@ def filter_crosswalk_table(
return crosswalk_table[COLUMNS].drop_duplicates()


def generate_iri(label: str):
"""generate IRIs for labels not found in crosswalk tables"""
suffix = label.lower().strip()
suffix = re.sub(r"/\W+/g", "-", suffix)
suffix = re.sub(r"[^a-z0-9-]+", "", suffix)
return "ASCTB-TEMP:" + suffix


def crosswalk(
matrix: anndata.AnnData,
annotation_column: str,
Expand All @@ -42,6 +51,15 @@ def crosswalk(
right_on=crosswalk_table_label_column,
how="left",
).drop(crosswalk_table_label_column, axis=1)
merged_obs.loc[
merged_obs[crosswalk_table_clid_column].isna(), crosswalk_table_clid_column
] = merged_obs.apply(
lambda row: generate_iri(row[annotation_column]),
axis=1,
)
merged_obs.loc[
merged_obs[crosswalk_table_match_column].isna(), crosswalk_table_match_column
] = "skos:exactMatch"
merged_obs.index = matrix.obs.index
matrix.obs = merged_obs
return matrix
Expand Down

0 comments on commit a3533a9

Please sign in to comment.