Details
+
+
### Get PMIDs used for OMIM codes from `omim.ttl`
-Command: `make get-pmids`
+Command: `sh run.sh make get-pmids`
### OMIM Code Web Scraper
Currently, the only feature is `get_codes_by_yyyy_mm`, which returns a list of
@@ -86,3 +89,7 @@ from omim2obo.omim_code_scraper import get_codes_by_yyyy_mm
code_tuples = get_codes_by_yyyy_mm('2021/05')
```
+
+
+
+
diff --git a/makefile b/makefile
index 11f3af7..04c3f83 100644
--- a/makefile
+++ b/makefile
@@ -2,7 +2,7 @@
# MAIN COMMANDS / GOALS ------------------------------------------------------------------------------------------------
-all: omim.ttl omim.sssom.tsv omim.owl mondo_genes.csv
+all: omim.ttl omim.sssom.tsv omim.owl mondo-omim-genes.robot.tsv
# build: Create new omim.ttl
omim.ttl:
@@ -35,8 +35,13 @@ omim.owl: omim.ttl mondo_exactmatch_omim.sssom.owl mondo_exactmatch_omimps.sssom
query --update sparql/hgnc_links.ru \
convert -f ofn -o $@
-mondo_genes.csv: omim.owl
- robot query -i omim.owl --query sparql/mondo_genes.sparql $@
+# Create a TSV of relational information for gene and disease classes
+mondo-omim-genes.tsv: omim.owl
+ robot query -i omim.owl --query sparql/mondo-omim-genes.sparql $@
+
+# Create a TSV of relational information for gene and disease classes, as a ROBOT template
+mondo-omim-genes.robot.tsv: mondo-omim-genes.tsv
+ python -m omim2obo.mondo_omim_genes_robot_tsv --inpath $< --outpath $@
cleanup:
@rm -f omim.json
diff --git a/omim2obo/mondo_omim_genes_robot_tsv.py b/omim2obo/mondo_omim_genes_robot_tsv.py
new file mode 100644
index 0000000..9f1af93
--- /dev/null
+++ b/omim2obo/mondo_omim_genes_robot_tsv.py
@@ -0,0 +1,64 @@
+"""Create: ROBOT template of Mondo and OMIM gene relations: relational information for gene and disease classes"""
+from argparse import ArgumentParser
+from pathlib import Path
+from typing import Dict, Union
+
+import pandas as pd
+
+from omim2obo.utils.utils import remove_angle_brackets
+
+
+ROBOT_SUBHEADER = {
+ 'mondo_id': 'ID',
+ 'hgnc_id': "SC 'has material basis in germline mutation in' some %",
+ 'omim_disease_xref': '>A oboInOwl:source',
+ 'omim_gene': '',
+}
+
+
+def mondo_omim_genes_robot_tsv(inpath: Union[Path, str], outpath: Union[Path, str]) -> pd.DataFrame:
+ """Create: ROBOT template of Mondo and OMIM gene relations"""
+ df = pd.read_csv(inpath, sep='\t')
+
+ # Remove the first character, a question mark (?), from each field in the header; an artefact of the SPARQL query.
+ df.rename(columns={col: col[1:] for col in df.columns if col.startswith('?')}, inplace=True)
+
+ # Remove < and > characters from specified columns
+ uri_cols = ['mondo_id', 'hgnc_id', 'omim_gene']
+ for col in uri_cols:
+ df[col] = remove_angle_brackets(list(df[col]))
+
+ # Format col order
+ df = df[['mondo_id', 'hgnc_id', 'omim_disease_xref', 'omim_gene']]
+
+ # Sort
+ df = df.sort_values(by=['mondo_id', 'hgnc_id', 'omim_gene', 'omim_disease_xref'])
+
+ # Remove cases where >1 gene association
+ # - These indicate non-causal relationships, which we don't care about.
+ df = df[~df['omim_disease_xref'].duplicated(keep=False)]
+
+ # Insert ROBOT subheader
+ df = pd.concat([pd.DataFrame([ROBOT_SUBHEADER]), df])
+
+ df.to_csv(outpath, sep='\t', index=False)
+ return pd.DataFrame()
+
+
+def cli():
+ """Command line interface."""
+ parser = ArgumentParser(
+ prog='mondo-genes-robot-tsv',
+ description='Create a ROBOT template TSV of relational information for gene and disease classes')
+ parser.add_argument(
+ '-i', '--inpath', required=True,
+ help='Path to file with such relational information, but not yet formatted as a ROBOT template.')
+ parser.add_argument(
+ '-o', '--outpath', required=True,
+ help='Path to save output.')
+ d: Dict = vars(parser.parse_args())
+ mondo_omim_genes_robot_tsv(**d)
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/omim2obo/utils/utils.py b/omim2obo/utils/utils.py
new file mode 100644
index 0000000..abd119f
--- /dev/null
+++ b/omim2obo/utils/utils.py
@@ -0,0 +1,16 @@
+"""Misc utilities"""
+from typing import List, Union
+
+
+# todo: also in mondo-ingest. Refactor into mondolib: https://github.com/monarch-initiative/mondolib/issues/13
+def remove_angle_brackets(uris: Union[str, List[str]]) -> Union[str, List[str]]:
+ """Remove angle brackets from URIs, e.g.:
+