Skip to content

Commit

Permalink
Bug fixes: Synonym Sync - combined cases
Browse files Browse the repository at this point in the history
- Bug fix: synonym_sync_combined_cases.robot.tsv would sometimes be empty.
- Bug fix: Sometimes builds would pause in a particular place until some input was made in the terminal. Turned out to be an artefact of the same underlying problem.
  • Loading branch information
joeflack4 committed Dec 18, 2024
1 parent 1d8f07e commit dea1186
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 22 deletions.
13 changes: 6 additions & 7 deletions src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,9 @@ tmp/%-synonyms-scope-type-xref.tsv: $(COMPONENTSDIR)/%.owl
../../tests/input/sync_synonym/%-synonyms-scope-type-xref.tsv:
$(ROBOT) query -i ../../tests/input/sync_synonym/test_$*.owl --query ../sparql/synonyms-scope-type-xref.sparql $@

# todo: we may remove this output later output for analysis during development; at the end, remove it and its usages
INPUT_FILES := $(wildcard tmp/synonym_sync_combined_cases_*.tsv)
$(SYN_SYNC_DIR)/synonym_sync_combined_cases.robot.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(SYN_SYNC_DIR)/$(n)-synonyms.added.robot.tsv)
head -n 2 $(firstword $(INPUT_FILES)) > $@
for file in $(INPUT_FILES); do \
$(SYN_SYNC_DIR)/synonym_sync_combined_cases.robot.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(TMPDIR)/synonym_sync_combined_cases_$(n).tsv)
head -n 2 $< > $@
for file in $^; do \
tail -n +3 $$file >> $@; \
done

Expand All @@ -611,7 +609,7 @@ $(SYN_SYNC_DIR)/sync-synonyms.confirmed.robot.tsv: $(foreach n,$(ALL_COMPONENT_I
$(SYN_SYNC_DIR)/sync-synonyms.updated.robot.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(SYN_SYNC_DIR)/$(n)-synonyms.updated.robot.tsv)
awk '(NR == 1) || (NR == 2) || (FNR > 2)' $(SYN_SYNC_DIR)/*.synonyms.updated.robot.tsv > $@

$(SYN_SYNC_DIR)/%-synonyms.added.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.confirmed.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.updated.robot.tsv: $(TMPDIR)/mondo.sssom.tsv $(COMPONENTSDIR)/%.db metadata/%.yml tmp/mondo-synonyms-scope-type-xref.tsv tmp/%-synonyms-scope-type-xref.tsv | $(SYN_SYNC_DIR)
$(SYN_SYNC_DIR)/%-synonyms.added.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.confirmed.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.updated.robot.tsv $(TMPDIR)/synonym_sync_combined_cases_%.tsv: $(TMPDIR)/mondo.sssom.tsv $(COMPONENTSDIR)/%.db metadata/%.yml tmp/mondo-synonyms-scope-type-xref.tsv tmp/%-synonyms-scope-type-xref.tsv | $(SYN_SYNC_DIR)
python3 $(SCRIPTSDIR)/sync_synonym.py \
--mondo-mappings-path $(TMPDIR)/mondo.sssom.tsv \
--ontology-db-path $(COMPONENTSDIR)/$*.db \
Expand All @@ -621,7 +619,8 @@ $(SYN_SYNC_DIR)/%-synonyms.added.robot.tsv $(SYN_SYNC_DIR)/%-synonyms.confirmed.
--onto-config-path metadata/$*.yml \
--outpath-added $(SYN_SYNC_DIR)/$*.synonyms.added.robot.tsv \
--outpath-confirmed $(SYN_SYNC_DIR)/$*.synonyms.confirmed.robot.tsv \
--outpath-updated $(SYN_SYNC_DIR)/$*.synonyms.updated.robot.tsv
--outpath-updated $(SYN_SYNC_DIR)/$*.synonyms.updated.robot.tsv \
--outpath-combined $(TMPDIR)/synonym_sync_combined_cases_$*.tsv

##################################
## Externally managed content ####
Expand Down
26 changes: 11 additions & 15 deletions src/scripts/sync_synonym.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from src.scripts.utils import PREFIX_MAP, get_owned_prefix_map


# todo: when combined_cases_df no longer necessary, remove 'case'
HEADERS_TO_ROBOT_SUBHEADERS = {
'mondo_id': 'ID',
'mondo_label': '',
Expand Down Expand Up @@ -204,17 +203,14 @@ def sync_synonyms(
ontology_db_path: Union[Path, str], mondo_synonyms_path: Union[Path, str],
mondo_exclusion_configs: Union[Path, str], onto_synonym_types_path: Union[Path, str],
mondo_mappings_path: Union[Path, str], onto_config_path: Union[Path, str], outpath_added: Union[Path, str],
outpath_confirmed: Union[Path, str], outpath_updated: Union[Path, str], outpath_deleted: Union[Path, str] = None,
combined_outpath_template_str='tmp/synonym_sync_combined_cases_{}.tsv'
outpath_confirmed: Union[Path, str], outpath_updated: Union[Path, str],
outpath_combined: Union[Path, str], outpath_deleted: Union[Path, str] = None,
):
"""Create outputs for syncing synonyms between Mondo and its sources.
todo: update when -deleted is reactivated
:param outpath_deleted: Optional. This case isn't fully fleshed out yet.
todo: if we decided that this param should stay, set as required CLI/functional param w/ no default value.
:param combined_outpath_template_str: Creates an additional file concatenating all case files.
todo: possible refactor: labels: Maybe could be done more cleanly and consistently. At first, wanted to add to both
source_df and mondo_df, but this caused _x and _y cols during joins, or I would have to join on those cols as well.
So I arbitrarily chose mondo_df. This is fine in all cases but -added, where they're added in a custom way.
Expand Down Expand Up @@ -387,15 +383,12 @@ def sync_synonyms(
deleted_df['case'] = 'deleted'

# Write outputs
# todo: temp: combined_cases_df: combine all cases for analysis during development
if combined_outpath_template_str:
combined_cases_df = pd.concat([confirmed_df, added_df, updated_df, deleted_df], ignore_index=True)\
.fillna('')
combined_cases_outpath = str(combined_outpath_template_str).format(source_name)
combined_cases_df = _common_operations(combined_cases_df, combined_cases_outpath, df_is_combined=True)
combined_cases_df['source'] = source_name
combined_cases_df = pd.concat([pd.DataFrame([HEADERS_TO_ROBOT_SUBHEADERS]), combined_cases_df])
combined_cases_df.to_csv(combined_cases_outpath, sep='\t', index=False)
combined_cases_df = pd.concat([confirmed_df, added_df, updated_df, deleted_df], ignore_index=True)\
.fillna('')
combined_cases_df = _common_operations(combined_cases_df, outpath_combined, df_is_combined=True)
combined_cases_df['source'] = source_name
combined_cases_df = pd.concat([pd.DataFrame([HEADERS_TO_ROBOT_SUBHEADERS]), combined_cases_df])
combined_cases_df.to_csv(outpath_combined, sep='\t', index=False)


def cli():
Expand Down Expand Up @@ -441,6 +434,9 @@ def cli():
'-u', '--outpath-updated', required=True,
help='Path to ROBOT template TSV to create which will contain updates to synonym scope predicate; cases where '
'the synonym exists in Mondo and on the mapped source term, but the scope predicate is different.')
parser.add_argument(
'-b', '--outpath-combined', required=True,
help='Path to curation file which is a concatenation of all cases.')
sync_synonyms(**vars(parser.parse_args()))


Expand Down

0 comments on commit dea1186

Please sign in to comment.