-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #113 from monarch-initiative/hgnc-template
HGNC robot template
- Loading branch information
Showing
8 changed files
with
191 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,4 +40,4 @@ jobs: | |
files: | | ||
omim.owl | ||
omim.sssom.tsv | ||
mondo_genes.csv | ||
mondo-omim-genes.robot.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
"""Create: ROBOT template of Mondo and OMIM gene relations: relational information for gene and disease classes""" | ||
from argparse import ArgumentParser | ||
from pathlib import Path | ||
from typing import Dict, Union | ||
|
||
import pandas as pd | ||
|
||
from omim2obo.utils.utils import remove_angle_brackets | ||
|
||
|
||
ROBOT_SUBHEADER = { | ||
'mondo_id': 'ID', | ||
'hgnc_id': "SC 'has material basis in germline mutation in' some %", | ||
'omim_disease_xref': '>A oboInOwl:source', | ||
'omim_gene': '', | ||
} | ||
|
||
|
||
def mondo_omim_genes_robot_tsv(inpath: Union[Path, str], outpath: Union[Path, str]) -> pd.DataFrame: | ||
"""Create: ROBOT template of Mondo and OMIM gene relations""" | ||
df = pd.read_csv(inpath, sep='\t') | ||
|
||
# Remove the first character, a question mark (?), from each field in the header; an artefact of the SPARQL query. | ||
df.rename(columns={col: col[1:] for col in df.columns if col.startswith('?')}, inplace=True) | ||
|
||
# Remove < and > characters from specified columns | ||
uri_cols = ['mondo_id', 'hgnc_id', 'omim_gene'] | ||
for col in uri_cols: | ||
df[col] = remove_angle_brackets(list(df[col])) | ||
|
||
# Format col order | ||
df = df[['mondo_id', 'hgnc_id', 'omim_disease_xref', 'omim_gene']] | ||
|
||
# Sort | ||
df = df.sort_values(by=['mondo_id', 'hgnc_id', 'omim_gene', 'omim_disease_xref']) | ||
|
||
# Remove cases where >1 gene association | ||
# - These indicate non-causal relationships, which we don't care about. | ||
df = df[~df['omim_disease_xref'].duplicated(keep=False)] | ||
|
||
# Insert ROBOT subheader | ||
df = pd.concat([pd.DataFrame([ROBOT_SUBHEADER]), df]) | ||
|
||
df.to_csv(outpath, sep='\t', index=False) | ||
return pd.DataFrame() | ||
|
||
|
||
def cli(): | ||
"""Command line interface.""" | ||
parser = ArgumentParser( | ||
prog='mondo-genes-robot-tsv', | ||
description='Create a ROBOT template TSV of relational information for gene and disease classes') | ||
parser.add_argument( | ||
'-i', '--inpath', required=True, | ||
help='Path to file with such relational information, but not yet formatted as a ROBOT template.') | ||
parser.add_argument( | ||
'-o', '--outpath', required=True, | ||
help='Path to save output.') | ||
d: Dict = vars(parser.parse_args()) | ||
mondo_omim_genes_robot_tsv(**d) | ||
|
||
|
||
if __name__ == '__main__': | ||
cli() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
"""Misc utilities""" | ||
from typing import List, Union | ||
|
||
|
||
# todo: also in mondo-ingest. Refactor into mondolib: https://github.com/monarch-initiative/mondolib/issues/13 | ||
def remove_angle_brackets(uris: Union[str, List[str]]) -> Union[str, List[str]]: | ||
"""Remove angle brackets from URIs, e.g.: | ||
<https://omim.org/entry/100050> --> https://omim.org/entry/100050""" | ||
str_input = isinstance(uris, str) | ||
uris = [uris] if str_input else uris | ||
uris2 = [] | ||
for x in uris: | ||
x = x[1:] if x.startswith('<') else x | ||
x = x[:-1] if x.endswith('>') else x | ||
uris2.append(x) | ||
return uris2[0] if str_input else uris2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#!/bin/sh | ||
# Wrapper script for docker. | ||
# | ||
# This is used primarily for wrapping the GNU Make workflow. | ||
# Instead of typing "make TARGET", type "./run.sh make TARGET". | ||
# This will run the make workflow within a docker container. | ||
# | ||
# The assumption is that you are working in the src/ontology folder; | ||
# we therefore map the whole repo (../..) to a docker volume. | ||
# | ||
# To use singularity instead of docker, please issue | ||
# export USE_SINGULARITY=<any-value> | ||
# before running this script. | ||
# | ||
# See README-editors.md for more details. | ||
|
||
if [ -f run.sh.conf ]; then | ||
. ./run.sh.conf | ||
fi | ||
|
||
# Look for a GitHub token | ||
if [ -n "$GH_TOKEN" ]; then | ||
: | ||
elif [ -f ../../.github/token.txt ]; then | ||
GH_TOKEN=$(cat ../../.github/token.txt) | ||
elif [ -f $XDG_CONFIG_HOME/ontology-development-kit/github/token ]; then | ||
GH_TOKEN=$(cat $XDG_CONFIG_HOME/ontology-development-kit/github/token) | ||
elif [ -f "$HOME/Library/Application Support/ontology-development-kit/github/token" ]; then | ||
GH_TOKEN=$(cat "$HOME/Library/Application Support/ontology-development-kit/github/token") | ||
fi | ||
|
||
ODK_IMAGE=${ODK_IMAGE:-odkfull} | ||
TAG_IN_IMAGE=$(echo $ODK_IMAGE | awk -F':' '{ print $2 }') | ||
if [ -n "$TAG_IN_IMAGE" ]; then | ||
# Override ODK_TAG env var if IMAGE already includes a tag | ||
ODK_TAG=$TAG_IN_IMAGE | ||
ODK_IMAGE=$(echo $ODK_IMAGE | awk -F':' '{ print $1 }') | ||
fi | ||
ODK_TAG=${ODK_TAG:-v1.4.3} | ||
ODK_JAVA_OPTS=${ODK_JAVA_OPTS:--Xmx20G} | ||
ODK_DEBUG=${ODK_DEBUG:-no} | ||
|
||
# Convert OWLAPI_* environment variables to the OWLAPI as Java options | ||
# See http://owlcs.github.io/owlapi/apidocs_4/org/semanticweb/owlapi/model/parameters/ConfigurationOptions.html | ||
# for a list of allowed options | ||
OWLAPI_OPTIONS_NAMESPACE=org.semanticweb.owlapi.model.parameters.ConfigurationOptions | ||
for owlapi_var in $(env | sed -n s/^OWLAPI_//p) ; do | ||
ODK_JAVA_OPTS="$ODK_JAVA_OPTS -D$OWLAPI_OPTIONS_NAMESPACE.${owlapi_var%=*}=${owlapi_var#*=}" | ||
done | ||
|
||
TIMECMD= | ||
if [ x$ODK_DEBUG = xyes ]; then | ||
# If you wish to change the format string, take care of using | ||
# non-breaking spaces (U+00A0) instead of normal spaces, to | ||
# prevent the shell from tokenizing the format string. | ||
echo "Running ${IMAGE} with ${ODK_JAVA_OPTS} of memory for ROBOT and Java-based pipeline steps." | ||
TIMECMD="/usr/bin/time -f ### DEBUG STATS ###\nElapsed time: %E\nPeak memory: %M kb" | ||
fi | ||
|
||
VOLUME_BIND=$PWD:/work | ||
WORK_DIR=/work | ||
|
||
if [ -n "$ODK_BINDS" ]; then | ||
VOLUME_BIND="$VOLUME_BIND,$ODK_BINDS" | ||
fi | ||
|
||
if [ -n "$USE_SINGULARITY" ]; then | ||
|
||
singularity exec --cleanenv $ODK_SINGULARITY_OPTIONS \ | ||
--env "ROBOT_JAVA_ARGS=$ODK_JAVA_OPTS,JAVA_OPTS=$ODK_JAVA_OPTS" \ | ||
--bind $VOLUME_BIND \ | ||
-W $WORK_DIR \ | ||
docker://obolibrary/$ODK_IMAGE:$ODK_TAG $TIMECMD "$@" | ||
else | ||
BIND_OPTIONS="-v $(echo $VOLUME_BIND | sed 's/,/ -v /')" | ||
docker run $ODK_DOCKER_OPTIONS $BIND_OPTIONS -w $WORK_DIR \ | ||
-e ROBOT_JAVA_ARGS="$ODK_JAVA_OPTS" -e JAVA_OPTS="$ODK_JAVA_OPTS" \ | ||
--rm -ti obolibrary/$ODK_IMAGE:$ODK_TAG $TIMECMD "$@" | ||
fi | ||
|
||
case "$@" in | ||
*update_repo*|*release*) | ||
echo "Please remember to update your ODK image from time to time: https://oboacademy.github.io/obook/howto/odk-update/." | ||
;; | ||
esac |
File renamed without changes.