Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes: synonym_sync_combined_cases.robot.tsv & build hangs #727

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,9 @@ tmp/%-synonyms-scope-type-xref.tsv: $(COMPONENTSDIR)/%.owl
../../tests/input/sync_synonym/%-synonyms-scope-type-xref.tsv:
$(ROBOT) query -i ../../tests/input/sync_synonym/test_$*.owl --query ../sparql/synonyms-scope-type-xref.sparql $@

# todo: we may remove this output later output for analysis during development; at the end, remove it and its usages
INPUT_FILES := $(wildcard tmp/synonym_sync_combined_cases_*.tsv)
$(SYN_SYNC_DIR)/synonym_sync_combined_cases.robot.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(SYN_SYNC_DIR)/$(n)-synonyms.added.robot.tsv)
head -n 2 $(firstword $(INPUT_FILES)) > $@
for file in $(INPUT_FILES); do \
$(SYN_SYNC_DIR)/synonym_sync_combined_cases.robot.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(TMPDIR)/synonym_sync_combined_cases_$(n).tsv)
head -n 2 $< > $@
for file in $^; do \
tail -n +3 $$file >> $@; \
done

Expand All @@ -611,7 +609,7 @@ $(SYN_SYNC_DIR)/sync-synonyms.confirmed.robot.tsv: $(foreach n,$(ALL_COMPONENT_I
$(SYN_SYNC_DIR)/sync-synonyms.updated.robot.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(SYN_SYNC_DIR)/$(n)-synonyms.updated.robot.tsv)
awk '(NR == 1) || (NR == 2) || (FNR > 2)' $(SYN_SYNC_DIR)/*.synonyms.updated.robot.tsv > $@

$(SYN_SYNC_DIR)/%-synonyms.added.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.confirmed.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.updated.robot.tsv: $(TMPDIR)/mondo.sssom.tsv $(COMPONENTSDIR)/%.db metadata/%.yml tmp/mondo-synonyms-scope-type-xref.tsv tmp/%-synonyms-scope-type-xref.tsv | $(SYN_SYNC_DIR)
$(SYN_SYNC_DIR)/%-synonyms.added.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.confirmed.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.updated.robot.tsv $(TMPDIR)/synonym_sync_combined_cases_%.tsv: $(TMPDIR)/mondo.sssom.tsv $(COMPONENTSDIR)/%.db metadata/%.yml tmp/mondo-synonyms-scope-type-xref.tsv tmp/%-synonyms-scope-type-xref.tsv | $(SYN_SYNC_DIR)
python3 $(SCRIPTSDIR)/sync_synonym.py \
--mondo-mappings-path $(TMPDIR)/mondo.sssom.tsv \
--ontology-db-path $(COMPONENTSDIR)/$*.db \
Expand All @@ -621,7 +619,8 @@ $(SYN_SYNC_DIR)/%-synonyms.added.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.confirmed.
--onto-config-path metadata/$*.yml \
--outpath-added $(SYN_SYNC_DIR)/$*.synonyms.added.robot.tsv \
--outpath-confirmed $(SYN_SYNC_DIR)/$*.synonyms.confirmed.robot.tsv \
--outpath-updated $(SYN_SYNC_DIR)/$*.synonyms.updated.robot.tsv
--outpath-updated $(SYN_SYNC_DIR)/$*.synonyms.updated.robot.tsv \
--outpath-combined $(TMPDIR)/synonym_sync_combined_cases_$*.tsv

##################################
## Externally managed content ####
Expand Down
26 changes: 11 additions & 15 deletions src/scripts/sync_synonym.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from src.scripts.utils import PREFIX_MAP, get_owned_prefix_map


# todo: when combined_cases_df no longer necessary, remove 'case'
HEADERS_TO_ROBOT_SUBHEADERS = {
'mondo_id': 'ID',
'mondo_label': '',
Expand Down Expand Up @@ -204,17 +203,14 @@ def sync_synonyms(
ontology_db_path: Union[Path, str], mondo_synonyms_path: Union[Path, str],
mondo_exclusion_configs: Union[Path, str], onto_synonym_types_path: Union[Path, str],
mondo_mappings_path: Union[Path, str], onto_config_path: Union[Path, str], outpath_added: Union[Path, str],
outpath_confirmed: Union[Path, str], outpath_updated: Union[Path, str], outpath_deleted: Union[Path, str] = None,
combined_outpath_template_str='tmp/synonym_sync_combined_cases_{}.tsv'
outpath_confirmed: Union[Path, str], outpath_updated: Union[Path, str],
outpath_combined: Union[Path, str], outpath_deleted: Union[Path, str] = None,
):
"""Create outputs for syncing synonyms between Mondo and its sources.

todo: update when -deleted is reactivated
:param outpath_deleted: Optional. This case isn't fully fleshed out yet.

todo: if we decided that this param should stay, set as required CLI/functional param w/ no default value.
:param combined_outpath_template_str: Creates an additional file concatenating all case files.

todo: possible refactor: labels: Maybe could be done more cleanly and consistently. At first, wanted to add to both
source_df and mondo_df, but this caused _x and _y cols during joins, or I would have to join on those cols as well.
So I arbitrarily chose mondo_df. This is fine in all cases but -added, where they're added in a custom way.
Expand Down Expand Up @@ -387,15 +383,12 @@ def sync_synonyms(
deleted_df['case'] = 'deleted'

# Write outputs
# todo: temp: combined_cases_df: combine all cases for analysis during development
if combined_outpath_template_str:
combined_cases_df = pd.concat([confirmed_df, added_df, updated_df, deleted_df], ignore_index=True)\
.fillna('')
combined_cases_outpath = str(combined_outpath_template_str).format(source_name)
combined_cases_df = _common_operations(combined_cases_df, combined_cases_outpath, df_is_combined=True)
combined_cases_df['source'] = source_name
combined_cases_df = pd.concat([pd.DataFrame([HEADERS_TO_ROBOT_SUBHEADERS]), combined_cases_df])
combined_cases_df.to_csv(combined_cases_outpath, sep='\t', index=False)
combined_cases_df = pd.concat([confirmed_df, added_df, updated_df, deleted_df], ignore_index=True)\
.fillna('')
combined_cases_df = _common_operations(combined_cases_df, outpath_combined, df_is_combined=True)
combined_cases_df['source'] = source_name
combined_cases_df = pd.concat([pd.DataFrame([HEADERS_TO_ROBOT_SUBHEADERS]), combined_cases_df])
combined_cases_df.to_csv(outpath_combined, sep='\t', index=False)


def cli():
Expand Down Expand Up @@ -441,6 +434,9 @@ def cli():
'-u', '--outpath-updated', required=True,
help='Path to ROBOT template TSV to create which will contain updates to synonym scope predicate; cases where '
'the synonym exists in Mondo and on the mapped source term, but the scope predicate is different.')
parser.add_argument(
'-b', '--outpath-combined', required=True,
help='Path to curation file which is a concatenation of all cases.')
sync_synonyms(**vars(parser.parse_args()))


Expand Down