From fdc161b1a5c1aab44b8a42da833755946bd0464a Mon Sep 17 00:00:00 2001 From: Chris Allan Date: Thu, 7 Dec 2023 18:18:52 +0000 Subject: [PATCH 1/8] First pass at large FileAnnotation count performance (See gh-517) Refactors the stack which produces the list of existing FileAnnotations for selection in the "Choose attachments" dialog. --- omeroweb/webclient/controller/container.py | 139 ++++++++++++------ .../webclient/annotations/files_form.html | 2 +- omeroweb/webclient/views.py | 6 +- 3 files changed, 99 insertions(+), 48 deletions(-) diff --git a/omeroweb/webclient/controller/container.py b/omeroweb/webclient/controller/container.py index 7412506a84..f8ba102934 100644 --- a/omeroweb/webclient/controller/container.py +++ b/omeroweb/webclient/controller/container.py @@ -24,10 +24,11 @@ # import omero -from omero.rtypes import rstring, rlong, unwrap +from omero.rtypes import rstring, rlist, rlong, unwrap from django.utils.encoding import smart_str import logging +from omero.model import FileAnnotationI from omeroweb.webclient.controller import BaseController from omeroweb.webgateway.views import _bulk_file_annotations @@ -78,7 +79,7 @@ def __init__( annotation=None, index=None, orphaned=None, - **kw + **kw, ): BaseController.__init__(self, conn) if project is not None: @@ -516,64 +517,114 @@ def canUseOthersAnns(self): return True return False + class FileAnnotationShim: + """ + Duck type which loosely mimics the structure that + AnnotationQuerySetIterator is expecting to be able to yield + FileAnnotation.id and FileAnnotation.file.name. + """ + + def __init__(self, _id, name): + self._obj = FileAnnotationI() + self.id = unwrap(_id) + self.name = unwrap(name) + + def getFileName(self): + return self.name + + FILES_BY_OBJECT_QUERY = ( + "SELECT {columns} FROM FileAnnotation AS fa " + "JOIN fa.file AS ofile " + " WHERE NOT EXISTS ( " + " SELECT 1 FROM {parent_type}AnnotationLink sa_link " + " WHERE sa_link.parent.id in (:ids) " + " AND fa.id = sa_link.child.id " + " AND fa.ns not in (:ns_to_exclude) " + " {owned_by_me} " + " GROUP BY sa_link.child.id " + " HAVING count(sa_link.id) >= :count " + " ) " + " {order_by}" + ) + def getFilesByObject(self, parent_type=None, parent_ids=None): - eid = ( + me = ( (not self.canUseOthersAnns()) and self.conn.getEventContext().userId or None ) - ns = [ - omero.constants.namespaces.NSCOMPANIONFILE, - omero.constants.namespaces.NSEXPERIMENTERPHOTO, - ] - - def sort_file_anns(file_ann_gen): - file_anns = list(file_ann_gen) - try: - file_anns.sort(key=lambda x: x.getFile().getName().lower()) - except Exception: - pass - return file_anns + ns_to_exclude = rlist( + [ + rstring(omero.constants.namespaces.NSCOMPANIONFILE), + rstring(omero.constants.namespaces.NSEXPERIMENTERPHOTO), + ] + ) if self.image is not None: - return sort_file_anns( - self.image.listOrphanedAnnotations(eid=eid, ns=ns, anntype="File") - ) + parent_ids = [self.image.getId()] + parent_type = "Image" elif self.dataset is not None: - return sort_file_anns( - self.dataset.listOrphanedAnnotations(eid=eid, ns=ns, anntype="File") - ) + parent_ids = [self.dataset.getId()] + parent_type = "Dataset" elif self.project is not None: - return sort_file_anns( - self.project.listOrphanedAnnotations(eid=eid, ns=ns, anntype="File") - ) + parent_ids = [self.project.getId()] + parent_type = "Project" elif self.well is not None: - return sort_file_anns( - self.well.getWellSample() - .image() - .listOrphanedAnnotations(eid=eid, ns=ns, anntype="File") - ) + parent_ids = [self.well.getId()] + parent_type = "Well" elif self.plate is not None: - return sort_file_anns( - self.plate.listOrphanedAnnotations(eid=eid, ns=ns, anntype="File") - ) + parent_ids = [self.plate.getId()] + parent_type = "Plate" elif self.screen is not None: - return sort_file_anns( - self.screen.listOrphanedAnnotations(eid=eid, ns=ns, anntype="File") - ) + parent_ids = [self.screen.getId()] + parent_type = "Screen" elif self.acquisition is not None: - return sort_file_anns( - self.acquisition.listOrphanedAnnotations(eid=eid, ns=ns, anntype="File") - ) + parent_ids = [self.acquisition.getId()] + parent_type = "PlateAcqusition" elif parent_type and parent_ids: parent_type = parent_type.title() if parent_type == "Acquisition": parent_type = "PlateAcquisition" - return sort_file_anns( - self.conn.listOrphanedAnnotations( - parent_type, parent_ids, eid=eid, ns=ns, anntype="File" - ) - ) else: - return sort_file_anns(self.conn.listFileAnnotations(eid=eid)) + raise ValueError("No context provided!") + + q = self.conn.getQueryService() + params = omero.sys.ParametersI() + # Count the total number of FileAnnotations that would match the + # full query below + params.addIds(parent_ids) + params.map["ns_to_exclude"] = ns_to_exclude + params.addLong("count", len(parent_ids)) + owned_by_me = "" + if me: + owned_by_me = "AND sa_link.details.owner.id = :me" + params.addLong("me", me) + columns = "count(*)" + query = self.FILES_BY_OBJECT_QUERY.format( + columns=columns, + parent_type=parent_type, + owned_by_me=owned_by_me, + order_by="", + ) + (row,) = q.projection(query, params, self.conn.SERVICE_OPTS) + (total_files,) = unwrap(row) + + # Perform the full query and limit the results so that we don't get + # overwhelmed + params.page(0, 100) # offset, limit + columns = "fa.id, ofile.name" + order_by = "ORDER BY fa.details.updateEvent.time DESC" + query = self.FILES_BY_OBJECT_QUERY.format( + columns=columns, + parent_type=parent_type, + owned_by_me=owned_by_me, + order_by=order_by, + ) + rows = q.projection(query, params, self.conn.SERVICE_OPTS) + + logger.warn(f"TOTAL FILES: {total_files}") + return ( + total_files, + [self.FileAnnotationShim(_id, name) for (_id, name) in rows], + ) #################################################################### # Creation diff --git a/omeroweb/webclient/templates/webclient/annotations/files_form.html b/omeroweb/webclient/templates/webclient/annotations/files_form.html index 890d4de9a7..07c0e8eff2 100644 --- a/omeroweb/webclient/templates/webclient/annotations/files_form.html +++ b/omeroweb/webclient/templates/webclient/annotations/files_form.html @@ -60,7 +60,7 @@

{% trans "Upload a new file" %}:

And/or attach an existing File:

- Select the attachments to add + Select the attachments to add (total: {{ total_files }})
{{ form_file.files }}
diff --git a/omeroweb/webclient/views.py b/omeroweb/webclient/views.py index 560f721731..e99a3127a4 100755 --- a/omeroweb/webclient/views.py +++ b/omeroweb/webclient/views.py @@ -2385,14 +2385,14 @@ def annotate_file(request, conn=None, **kwargs): return handlerInternalError(request, x) if manager is not None: - files = manager.getFilesByObject() + total_files, files = manager.getFilesByObject() else: manager = BaseContainer(conn) for dtype, objs in oids.items(): if len(objs) > 0: # NB: we only support a single data-type now. E.g. 'image' OR # 'dataset' etc. - files = manager.getFilesByObject( + total_files, files = manager.getFilesByObject( parent_type=dtype, parent_ids=[o.getId() for o in objs] ) break @@ -2421,7 +2421,7 @@ def annotate_file(request, conn=None, **kwargs): else: form_file = FilesAnnotationForm(initial=initial) - context = {"form_file": form_file} + context = {"form_file": form_file, "total_files": total_files} template = "webclient/annotations/files_form.html" context["template"] = template return context From 0c85975dd7704f14d72b1a884c274516f99fd110 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Fri, 8 Dec 2023 15:32:07 +0100 Subject: [PATCH 2/8] Update deprecated warning logging --- omeroweb/webclient/controller/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omeroweb/webclient/controller/container.py b/omeroweb/webclient/controller/container.py index f8ba102934..f5b90f728e 100644 --- a/omeroweb/webclient/controller/container.py +++ b/omeroweb/webclient/controller/container.py @@ -620,7 +620,7 @@ def getFilesByObject(self, parent_type=None, parent_ids=None): ) rows = q.projection(query, params, self.conn.SERVICE_OPTS) - logger.warn(f"TOTAL FILES: {total_files}") + logger.warning(f"TOTAL FILES: {total_files}") return ( total_files, [self.FileAnnotationShim(_id, name) for (_id, name) in rows], From a958acf35439af84c3bedbfcaf6b83dafa6fecf1 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Tue, 12 Dec 2023 10:19:45 +0100 Subject: [PATCH 3/8] Sort files by name, then by descending ID --- omeroweb/webclient/controller/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omeroweb/webclient/controller/container.py b/omeroweb/webclient/controller/container.py index f5b90f728e..794ebb505d 100644 --- a/omeroweb/webclient/controller/container.py +++ b/omeroweb/webclient/controller/container.py @@ -611,7 +611,7 @@ def getFilesByObject(self, parent_type=None, parent_ids=None): # overwhelmed params.page(0, 100) # offset, limit columns = "fa.id, ofile.name" - order_by = "ORDER BY fa.details.updateEvent.time DESC" + order_by = "ORDER BY ofile.name, fa.id DESC" query = self.FILES_BY_OBJECT_QUERY.format( columns=columns, parent_type=parent_type, From aa3e827615d5bb0ae2908149607e2e943145bb07 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Tue, 12 Dec 2023 10:27:28 +0100 Subject: [PATCH 4/8] Increase page size to 500 --- omeroweb/webclient/controller/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omeroweb/webclient/controller/container.py b/omeroweb/webclient/controller/container.py index 794ebb505d..d421153024 100644 --- a/omeroweb/webclient/controller/container.py +++ b/omeroweb/webclient/controller/container.py @@ -609,7 +609,7 @@ def getFilesByObject(self, parent_type=None, parent_ids=None): # Perform the full query and limit the results so that we don't get # overwhelmed - params.page(0, 100) # offset, limit + params.page(0, 500) # offset, limit columns = "fa.id, ofile.name" order_by = "ORDER BY ofile.name, fa.id DESC" query = self.FILES_BY_OBJECT_QUERY.format( From 6312c4c30792da8ab623514ceeb6ede4b1e0a677 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Tue, 12 Dec 2023 10:31:53 +0100 Subject: [PATCH 5/8] Add note to dialog when only showing limited number of files --- .../templates/webclient/annotations/files_form.html | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/omeroweb/webclient/templates/webclient/annotations/files_form.html b/omeroweb/webclient/templates/webclient/annotations/files_form.html index 07c0e8eff2..426ea35f26 100644 --- a/omeroweb/webclient/templates/webclient/annotations/files_form.html +++ b/omeroweb/webclient/templates/webclient/annotations/files_form.html @@ -60,7 +60,10 @@

{% trans "Upload a new file" %}:

And/or attach an existing File:

- Select the attachments to add (total: {{ total_files }}) + + Select the attachments to add + (total: {{ total_files }}{% if total_files > 500 %}, showing 500{% endif %}) +
{{ form_file.files }}
From a806cc6fd3926ecef29dc18adbdbc2fa1411eb88 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Tue, 12 Dec 2023 13:45:58 +0100 Subject: [PATCH 6/8] Add null check to match previous query --- omeroweb/webclient/controller/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omeroweb/webclient/controller/container.py b/omeroweb/webclient/controller/container.py index d421153024..19909a5554 100644 --- a/omeroweb/webclient/controller/container.py +++ b/omeroweb/webclient/controller/container.py @@ -539,7 +539,7 @@ def getFileName(self): " SELECT 1 FROM {parent_type}AnnotationLink sa_link " " WHERE sa_link.parent.id in (:ids) " " AND fa.id = sa_link.child.id " - " AND fa.ns not in (:ns_to_exclude) " + " AND (fa.ns not in (:ns_to_exclude) OR fa.ns IS NULL)" " {owned_by_me} " " GROUP BY sa_link.child.id " " HAVING count(sa_link.id) >= :count " From bdbfed63c7eacf9304aebdda64e5ec8613fabc86 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Thu, 14 Dec 2023 10:22:52 +0100 Subject: [PATCH 7/8] Don't repeat max files in different places --- omeroweb/webclient/controller/container.py | 4 ++-- .../webclient/annotations/files_form.html | 2 +- omeroweb/webclient/views.py | 18 +++++++++++++++--- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/omeroweb/webclient/controller/container.py b/omeroweb/webclient/controller/container.py index 19909a5554..16165b8e28 100644 --- a/omeroweb/webclient/controller/container.py +++ b/omeroweb/webclient/controller/container.py @@ -547,7 +547,7 @@ def getFileName(self): " {order_by}" ) - def getFilesByObject(self, parent_type=None, parent_ids=None): + def getFilesByObject(self, parent_type=None, parent_ids=None, offset=0, limit=100): me = ( (not self.canUseOthersAnns()) and self.conn.getEventContext().userId or None ) @@ -609,7 +609,7 @@ def getFilesByObject(self, parent_type=None, parent_ids=None): # Perform the full query and limit the results so that we don't get # overwhelmed - params.page(0, 500) # offset, limit + params.page(offset, limit) columns = "fa.id, ofile.name" order_by = "ORDER BY ofile.name, fa.id DESC" query = self.FILES_BY_OBJECT_QUERY.format( diff --git a/omeroweb/webclient/templates/webclient/annotations/files_form.html b/omeroweb/webclient/templates/webclient/annotations/files_form.html index 426ea35f26..08f016658c 100644 --- a/omeroweb/webclient/templates/webclient/annotations/files_form.html +++ b/omeroweb/webclient/templates/webclient/annotations/files_form.html @@ -62,7 +62,7 @@

{% trans "Upload a new file" %}:

And/or attach an existing File:

Select the attachments to add - (total: {{ total_files }}{% if total_files > 500 %}, showing 500{% endif %}) + (total: {{ total_files }}{% if total_files > max_files %}, showing {{ max_files }}{% endif %})
{{ form_file.files }}
diff --git a/omeroweb/webclient/views.py b/omeroweb/webclient/views.py index e99a3127a4..f35eb74926 100755 --- a/omeroweb/webclient/views.py +++ b/omeroweb/webclient/views.py @@ -2318,6 +2318,8 @@ def batch_annotate(request, conn=None, **kwargs): return context +MAX_FILES_IN_FILE_ANNOTATION_DIALOG = 500 + @login_required() @render_response() def annotate_file(request, conn=None, **kwargs): @@ -2385,7 +2387,10 @@ def annotate_file(request, conn=None, **kwargs): return handlerInternalError(request, x) if manager is not None: - total_files, files = manager.getFilesByObject() + total_files, files = manager.getFilesByObject( + offset=0, + limit=MAX_FILES_IN_FILE_ANNOTATION_DIALOG, + ) else: manager = BaseContainer(conn) for dtype, objs in oids.items(): @@ -2393,7 +2398,10 @@ def annotate_file(request, conn=None, **kwargs): # NB: we only support a single data-type now. E.g. 'image' OR # 'dataset' etc. total_files, files = manager.getFilesByObject( - parent_type=dtype, parent_ids=[o.getId() for o in objs] + parent_type=dtype, + parent_ids=[o.getId() for o in objs], + offset=0, + limit=MAX_FILES_IN_FILE_ANNOTATION_DIALOG, ) break @@ -2421,7 +2429,11 @@ def annotate_file(request, conn=None, **kwargs): else: form_file = FilesAnnotationForm(initial=initial) - context = {"form_file": form_file, "total_files": total_files} + context = { + "form_file": form_file, + "total_files": total_files, + "max_files": MAX_FILES_IN_FILE_ANNOTATION_DIALOG, + } template = "webclient/annotations/files_form.html" context["template"] = template return context From 640b96002488147b58fb1d5488d87190708e6b49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Dec 2023 09:25:19 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- omeroweb/webclient/views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/omeroweb/webclient/views.py b/omeroweb/webclient/views.py index f35eb74926..6931c71a1a 100755 --- a/omeroweb/webclient/views.py +++ b/omeroweb/webclient/views.py @@ -2320,6 +2320,7 @@ def batch_annotate(request, conn=None, **kwargs): MAX_FILES_IN_FILE_ANNOTATION_DIALOG = 500 + @login_required() @render_response() def annotate_file(request, conn=None, **kwargs):