From 09c939822a23911e2332964c68d0c2e816c6d196 Mon Sep 17 00:00:00 2001 From: michael-lewis Date: Sat, 8 Jul 2023 14:18:33 +0100 Subject: [PATCH] Added filter to exclude content chunks from API query, for #99 Implement vector search --- src/web/content/dynamic/searchmysite/solr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/web/content/dynamic/searchmysite/solr.py b/src/web/content/dynamic/searchmysite/solr.py index 21077e6..6adf004 100644 --- a/src/web/content/dynamic/searchmysite/solr.py +++ b/src/web/content/dynamic/searchmysite/solr.py @@ -122,7 +122,9 @@ random_result_step3_get_doc_from_domain = 'select?q=*%3A*&rows=1&start={}&fq=domain%3A{}' + query_filter_content_type # 5. API query -solrquery = 'select?fl=id,url,title,author,description,tags,page_type,page_last_modified,published_date,language,indexed_inlinks,indexed_outlinks&q={}&start={}&rows={}&wt=json&fq=domain%3A{}&hl=on&hl.fl=content&hl.simple.pre={}&hl.simple.post={}' +# &fq=!relationship%3Achild added to ensure only parent pages are returned, i.e. not the content chunks used for embedding +# (can't use fq=relationship%3Aparent because not all pages will have a value for relationship initially) +solrquery = 'select?fl=id,url,title,author,description,tags,page_type,page_last_modified,published_date,language,indexed_inlinks,indexed_outlinks&q={}&start={}&rows={}&wt=json&fq=domain%3A{}&fq=!relationship%3Achild&hl=on&hl.fl=content&hl.simple.pre={}&hl.simple.post={}' # Solr update queries