Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

task/WP-164 Implement Workspace Search #886

Merged
merged 6 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions server/portal/apps/projects/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
list_projects, get_project, create_shared_workspace,\
update_project, get_workspace_role, change_user_role, add_user_to_workspace,\
remove_user, transfer_ownership

from portal.apps.search.tasks import tapis_project_listing_indexer
from portal.libs.elasticsearch.indexes import IndexedProject
from elasticsearch_dsl import Q

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -63,12 +65,35 @@ def get(self, request):
}
```
"""
# TODOv3: Support Elasticsearch queries for V3 projects https://jira.tacc.utexas.edu/browse/TV3-160
# query_string = request.GET.get('query_string')
# offset = int(request.GET.get('offset', 0))
# limit = int(request.GET.get('limit', 100))
client = request.user.tapis_oauth.client
listing = list_projects(client)

query_string = request.GET.get('query_string')
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 100))

listing = []

if query_string:
search = IndexedProject.search()

ngram_query = Q("query_string", query=query_string,
fields=["title"],
minimum_should_match='100%',
default_operator='or')

wildcard_query = Q("wildcard", title=f'*{query_string}*')

search = search.query(ngram_query | wildcard_query)
search = search.extra(from_=int(offset), size=int(limit))

res = search.execute()
hits = [hit.to_dict() for hit in res]
listing = hits
else:
client = request.user.tapis_oauth.client
listing = list_projects(client)

tapis_project_listing_indexer.delay(listing)

return JsonResponse({"status": 200, "response": listing})

def post(self, request): # pylint: disable=no-self-use
Expand Down
7 changes: 6 additions & 1 deletion server/portal/apps/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.conf import settings
from celery import shared_task
from portal.libs.agave.utils import user_account, service_account
from portal.libs.elasticsearch.utils import index_listing
from portal.libs.elasticsearch.utils import index_listing, index_project_listing
from portal.apps.users.utils import get_tas_allocations
from portal.apps.projects.models.metadata import ProjectMetadata
from portal.libs.elasticsearch.docs.base import (IndexedAllocation,
Expand Down Expand Up @@ -78,3 +78,8 @@ def index_project(self, project_id):
project_doc = IndexedProject(**project_dict)
project_doc.meta.id = project_id
project_doc.save()


@shared_task(bind=True, max_retries=3, queue='default')
def tapis_project_listing_indexer(self, projects):
index_project_listing(projects)
38 changes: 11 additions & 27 deletions server/portal/libs/elasticsearch/docs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,21 @@


class IndexedProject(Document):
id = Keyword()
title = Text(fields={'_exact': Keyword()})
description = Text()
created = Date()
lastModified = Date()
projectId = Keyword()
path = Text()
name = Text()
host = Text()
owner = Object(
properties={
'username': Keyword(),
'fullName': Text()
}
)
pi = Object(
properties={
'username': Keyword(),
'fullName': Text()
}
)
coPIs = Object(
multi=True,
properties={
'username': Keyword(),
'fullName': Text()
}
)
teamMembers = Object(
multi=True,
properties={
'username': Keyword(),
'fullName': Text()
}
properties={
'username': Keyword(),
'firstName': Text(),
'lastName': Text(),
'email': Text()
}
)
updated = Date()

@classmethod
def from_id(cls, projectId):
Expand Down
24 changes: 24 additions & 0 deletions server/portal/libs/elasticsearch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def index_listing(files):
file_dict = dict(_file)
if file_dict['name'][0] == '.':
continue
if not file_dict['path'].startswith('/'):
file_dict['path'] = '/' + file_dict['path']
file_dict['lastUpdated'] = current_time()
file_dict['basePath'] = os.path.dirname(file_dict['path'])
file_uuid = file_uuid_sha256(file_dict['system'], file_dict['path'])
Expand All @@ -219,3 +221,25 @@ def index_listing(files):
})

bulk(client, ops)


def index_project_listing(projects):
from portal.libs.elasticsearch.docs.base import IndexedProject

idx = IndexedProject.Index.name
client = get_connection('default')
ops = []

for _project in projects:
project_dict = dict(_project)
project_dict['updated'] = current_time()
project_uuid = get_sha256_hash(project_dict['id'])
ops.append({
'_index': idx,
'_id': project_uuid,
'doc': project_dict,
'_op_type': 'update',
'doc_as_upsert': True
})

bulk(client, ops)