diff --git a/discovery/model/schema.py b/discovery/model/schema.py index 74d3ae12..e7c82528 100644 --- a/discovery/model/schema.py +++ b/discovery/model/schema.py @@ -9,8 +9,16 @@ import functools from datetime import datetime -from elasticsearch_dsl import Boolean, Date, InnerDoc, Integer, Keyword, Object, Text -from elasticsearch_dsl import Index as ESIndex +from elasticsearch_dsl import ( + Boolean, + Date, + Index as ESIndex, + InnerDoc, + Integer, + Keyword, + Object, + Text, +) from elasticsearch_dsl.exceptions import ValidationException from .common import DiscoveryDoc, DiscoveryMeta, DiscoveryUserDoc @@ -23,7 +31,6 @@ def mergeDict(d1, d2): class SchemaMeta(DiscoveryMeta): - url = Keyword(required=True) # timestamp = Date() # when this document is updated last_updated = Date() @@ -32,7 +39,6 @@ class SchemaMeta(DiscoveryMeta): class SchemaStatusMeta(InnerDoc): - refresh_status = Integer() refresh_ts = Date() refresh_msg = Text(index=False) @@ -74,7 +80,6 @@ class Index: "number_of_replicas": 0, } - def update_index_meta(self, meta): allowed_keys = {"_meta"} if isinstance(meta, dict) and len(set(meta) - allowed_keys) == 0: @@ -171,7 +176,11 @@ class SchemaClass(DiscoveryDoc): uri = Text(fields={"raw": Keyword()}) parent_classes = Text(multi=True, analyzer="simple") # immediate ones only properties = Object(SchemaClassProp) # immediate ones only - validation = Object(enabled=False) + validation = Object( + dynamic=False, # only index fields listed + # indexing fields, validation.$schema(.raw) & validation.type, to allow filter/query on + properties={"$schema": Text(fields={"raw": Keyword()}), "type": Keyword()}, + ) # nested properties for filter ref = Boolean() # not defined in this schema class Index: @@ -186,6 +195,5 @@ class Index: } def save(self, **kwargs): - self.meta.id = f"{self.namespace}::{self.prefix}:{self.label}" return super().save(**kwargs) diff --git a/discovery/pipeline.py b/discovery/pipeline.py index 8f21b8dc..9ad96e30 100644 --- a/discovery/pipeline.py +++ b/discovery/pipeline.py @@ -13,35 +13,44 @@ class DiscoveryQueryBuilder(ESQueryBuilder): def default_string_query(self, q, options): - search = Search() - search = search.update_from_dict( - { - "query": { - "function_score": { - "query": { - "dis_max": { - "queries": [ - {"term": {"_id": {"value": q, "boost": 15.0}}}, - {"term": {"label.raw": {"value": q, "boost": 10.0}}}, - {"term": {"_meta.username": {"value": q}}}, # for dataset - {"term": {"name": {"value": q}}}, - {"match": {"parent_classes": {"query": q}}}, - {"prefix": {"label": {"value": q}}}, - {"query_string": {"query": q}}, - ] - } - }, - "functions": [ - {"filter": {"term": {"namespace": "schema"}}, "weight": 0.5}, - {"filter": {"term": {"prefix.raw": "schema"}}, "weight": 0.5}, - { - "filter": {"match": {"parent_classes": "bts:BiologicalEntity"}}, - "weight": 1.5, + q = q.strip() + + # Check for other elasticsearch query string syntax + if ":" in q or " AND " in q or " OR " in q: + search = search.query("query_string", query=q) + else: + # Update the search with the constructed query + search = search.update_from_dict( + { + "query": { + "function_score": { + "query": { + "dis_max": { + "queries": [ + {"term": {"_id": {"value": q, "boost": 15.0}}}, + {"term": {"label.raw": {"value": q, "boost": 10.0}}}, + {"term": {"_meta.username": {"value": q}}}, + {"term": {"name": {"value": q}}}, + {"match": {"parent_classes": {"query": q}}}, + {"prefix": {"label": {"value": q}}}, + {"query_string": {"query": q}}, + ] + } }, - ], - } + "functions": [ + {"filter": {"term": {"namespace": "schema"}}, "weight": 0.5}, + {"filter": {"term": {"prefix.raw": "schema"}}, "weight": 0.5}, + { + "filter": { + "match": {"parent_classes": "bts:BiologicalEntity"} + }, + "weight": 1.5, + }, + ], + } + }, } - } - ) + ) + return search