Skip to content

Commit

Permalink
fix: DB feature querying logic
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Dec 12, 2024
1 parent df7d9f4 commit badf282
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
14 changes: 10 additions & 4 deletions bento_reference_service/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def deserialize_genome_feature(rec: asyncpg.Record) -> GenomeFeature:
feature_name=rec["feature_name"],
feature_type=rec["feature_type"],
source=rec["source"],
entries=tuple(map(Database.deserialize_genome_feature_entry, json.loads(rec["entries"] or "[]"))),
entries=list(map(Database.deserialize_genome_feature_entry, json.loads(rec["entries"] or "[]"))),
gene_id=rec["gene_nat_id"],
attributes=json.loads(rec["attributes"] or "{}"),
parents=tuple(rec["parents"] or ()), # tuple of parent IDs
Expand Down Expand Up @@ -369,8 +369,12 @@ def _q_param(pv: str | int) -> str:
return f"${len(q_params) + 3}" # plus 3: g_id, offset, limit at start

if q:
query_param = _q_param(q)
q_op = "%" if q_fzy else "~"
if q_fzy:
q_op = "%"
query_param = _q_param(f"%{q}%")
else:
q_op = "~"
query_param = _q_param(q)
gf_where_items.append(
f"""
gf.feature_id IN (
Expand All @@ -397,8 +401,9 @@ def _q_param(pv: str | int) -> str:
if name:
param = _q_param(name)
if name_fzy:
param_fzy = _q_param(f"%{name}%")
gf_select_items.append(f"similarity(gf.feature_name, {param}) gf_fn_sml")
gf_where_items.append(f"gf.feature_name % {param}")
gf_where_items.append(f"gf.feature_name % {param_fzy}")
gf_order_items.append("gf_fn_sml DESC")
else:
gf_where_items.append(f"gf.feature_name = {param}")
Expand Down Expand Up @@ -524,6 +529,7 @@ async def bulk_ingest_genome_features(self, features: tuple[GenomeFeature, ...])

feature_types.add((feature.feature_type,))

e: GenomeFeatureEntry
entries.extend(
(
row_id,
Expand Down
2 changes: 1 addition & 1 deletion bento_reference_service/routers/refget.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class RefGetSequenceMetadata(BaseModel):
md5: str
ga4gh: str
length: int
aliases: list[Alias]
aliases: tuple[Alias, ...]


class RefGetSequenceMetadataResponse(BaseModel):
Expand Down
1 change: 1 addition & 0 deletions tests/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ async def test_genome_features_summary(db: Database, db_cleanup):
(SARS_COV_2_GENOME_ID, dict(q="ORF1ab"), 3),
(SARS_COV_2_GENOME_ID, dict(q="ENSSASG00005000002"), 1),
(SARS_COV_2_GENOME_ID, dict(q="protein_coding", q_fzy=True, limit=100), 24),
(SARS_COV_2_GENOME_ID, dict(q="tein_cod", q_fzy=True, limit=100), 24),
# hg38 subset
(HG38_CHR1_F100K_GENOME_ID, dict(position="chr1:11869-"), 3),
(HG38_CHR1_F100K_GENOME_ID, dict(start=12000), 10),
Expand Down

0 comments on commit badf282

Please sign in to comment.