diff --git a/backend/src/monarch_py/api/config.py b/backend/src/monarch_py/api/config.py index bc09c20f5..763c5848b 100644 --- a/backend/src/monarch_py/api/config.py +++ b/backend/src/monarch_py/api/config.py @@ -11,6 +11,7 @@ class Settings(BaseSettings): solr_host = os.getenv("SOLR_HOST") if os.getenv("SOLR_HOST") else "127.0.0.1" solr_port = os.getenv("SOLR_PORT") if os.getenv("SOLR_PORT") else 8983 solr_url = os.getenv("SOLR_URL") if os.getenv("SOLR_URL") else f"http://{solr_host}:{solr_port}/solr" + phenio_db_path = os.getenv("PHENIO_DB_PATH") if os.getenv("PHENIO_DB_PATH") else "/data/phenio.db" settings = Settings() @@ -23,4 +24,5 @@ def solr(): @lru_cache(maxsize=1) def oak(): - return OakImplementation().init_semsim() + phenio_db_path = settings.phenio_db_path if os.path.exists(settings.phenio_db_path) else None + return OakImplementation().init_semsim(phenio_path=phenio_db_path) diff --git a/backend/src/monarch_py/implementations/oak/oak_implementation.py b/backend/src/monarch_py/implementations/oak/oak_implementation.py index 405f61573..a1b1b5451 100644 --- a/backend/src/monarch_py/implementations/oak/oak_implementation.py +++ b/backend/src/monarch_py/implementations/oak/oak_implementation.py @@ -8,6 +8,7 @@ from oaklib.interfaces.semsim_interface import SemanticSimilarityInterface from oaklib.selector import get_adapter from linkml_runtime.dumpers.json_dumper import JSONDumper +import pystow @dataclass @@ -18,16 +19,28 @@ class OakImplementation(SemanticSimilarityInterface): json_dumper = JSONDumper() default_predicates = ["rdfs:subClassOf", "BFO:0000050", "UPHENO:0000001"] - def init_semsim(self): + default_phenio_db_url = "https://data.monarchinitiative.org/monarch-kg-dev/latest/phenio.db.gz" + + def init_semsim(self, phenio_path: str = None, force_update: bool = False): if self.semsim is None: logger.info("Warming up semsimian") start = time.time() # self.semsim = get_adapter(f"sqlite:obo:phenio") - logger.debug("Getting semsimian adapter") - self.semsim = get_adapter(f"semsimian:sqlite:obo:phenio") - # for some reason, we need to run a query to get the adapter - # to initialize properly + + if phenio_path: + logger.debug(f"Creating semsimian adapter using phenio_path at {phenio_path}") + self.semsim = get_adapter(f"semsimian:sqlite:{phenio_path}") + else: + monarchstow = pystow.module("monarch") + + with monarchstow.ensure_gunzip( + "phenio", url=self.default_phenio_db_url, force=force_update + ) as stowed_phenio_path: + logger.debug(f"Creating semsimian adapter using pystow at {stowed_phenio_path}") + self.semsim = get_adapter(f"semsimian:sqlite:{stowed_phenio_path}") + + # run a query to get the adapter to initialize properly logger.debug("Running query to initialize adapter") self.semsim.termset_pairwise_similarity( subjects=["MP:0010771"],