Skip to content

Commit

Permalink
Load phenio.db from /data if available, use pystow to fetch from mona…
Browse files Browse the repository at this point in the history
…rch data bucket if not (#356)

This should stop our multiple pystow download problem (and mean no
download on startup at all, happily)
  • Loading branch information
kevinschaper authored Sep 29, 2023
1 parent b04dbb1 commit 9298243
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
4 changes: 3 additions & 1 deletion backend/src/monarch_py/api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Settings(BaseSettings):
solr_host = os.getenv("SOLR_HOST") if os.getenv("SOLR_HOST") else "127.0.0.1"
solr_port = os.getenv("SOLR_PORT") if os.getenv("SOLR_PORT") else 8983
solr_url = os.getenv("SOLR_URL") if os.getenv("SOLR_URL") else f"http://{solr_host}:{solr_port}/solr"
phenio_db_path = os.getenv("PHENIO_DB_PATH") if os.getenv("PHENIO_DB_PATH") else "/data/phenio.db"


settings = Settings()
Expand All @@ -23,4 +24,5 @@ def solr():

@lru_cache(maxsize=1)
def oak():
return OakImplementation().init_semsim()
phenio_db_path = settings.phenio_db_path if os.path.exists(settings.phenio_db_path) else None
return OakImplementation().init_semsim(phenio_path=phenio_db_path)
23 changes: 18 additions & 5 deletions backend/src/monarch_py/implementations/oak/oak_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from oaklib.interfaces.semsim_interface import SemanticSimilarityInterface
from oaklib.selector import get_adapter
from linkml_runtime.dumpers.json_dumper import JSONDumper
import pystow


@dataclass
Expand All @@ -18,16 +19,28 @@ class OakImplementation(SemanticSimilarityInterface):
json_dumper = JSONDumper()
default_predicates = ["rdfs:subClassOf", "BFO:0000050", "UPHENO:0000001"]

def init_semsim(self):
default_phenio_db_url = "https://data.monarchinitiative.org/monarch-kg-dev/latest/phenio.db.gz"

def init_semsim(self, phenio_path: str = None, force_update: bool = False):
if self.semsim is None:
logger.info("Warming up semsimian")
start = time.time()
# self.semsim = get_adapter(f"sqlite:obo:phenio")
logger.debug("Getting semsimian adapter")
self.semsim = get_adapter(f"semsimian:sqlite:obo:phenio")

# for some reason, we need to run a query to get the adapter
# to initialize properly

if phenio_path:
logger.debug(f"Creating semsimian adapter using phenio_path at {phenio_path}")
self.semsim = get_adapter(f"semsimian:sqlite:{phenio_path}")
else:
monarchstow = pystow.module("monarch")

with monarchstow.ensure_gunzip(
"phenio", url=self.default_phenio_db_url, force=force_update
) as stowed_phenio_path:
logger.debug(f"Creating semsimian adapter using pystow at {stowed_phenio_path}")
self.semsim = get_adapter(f"semsimian:sqlite:{stowed_phenio_path}")

# run a query to get the adapter to initialize properly
logger.debug("Running query to initialize adapter")
self.semsim.termset_pairwise_similarity(
subjects=["MP:0010771"],
Expand Down

0 comments on commit 9298243

Please sign in to comment.