Skip to content

Commit

Permalink
Merge pull request #554 from chris-allan/bitmask-optimization
Browse files Browse the repository at this point in the history
Optimize obj_id_bitmask by using its own query
  • Loading branch information
knabar authored Jul 19, 2024
2 parents fe5cea5 + 79c2305 commit f2e0adc
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 35 deletions.
77 changes: 45 additions & 32 deletions omeroweb/webgateway/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3192,42 +3192,55 @@ def obj_id_bitmask(request, fileid, conn=None, query=None, **kwargs):
else None
)

rsp_data = perform_table_query(
conn,
fileid,
query,
[col_name],
offset=offset,
limit=limit,
lazy=False,
check_max_rows=False,
)
if "error" in rsp_data:
return rsp_data
ctx = conn.createServiceOptsDict()
ctx.setOmeroGroup("-1")
sr = conn.getSharedResources()
table = sr.openTable(omero.model.OriginalFileI(fileid, False), ctx)
if not table:
return {"error": "Table %s not found" % fileid}
try:
data = rowsToByteArray(rsp_data["data"]["rows"])
return HttpResponse(bytes(data), content_type="application/octet-stream")
except ValueError:
logger.error("ValueError when getting obj_id_bitmask")
return {"error": "Specified column has invalid type"}
column_names = [column.name for column in table.getHeaders()]
if col_name not in column_names:
# Previous implementations used perform_table_query() which
# defaults to returning all columns if the requested column name
# is unknown. We would have then packed the first column. We
# mimic that here by only querying for the first column.
#
# FIXME: This is really weird behaviour, especially with this
# endpoint defaulting to using the "object" column, and likely
# deserves to be refactored and deprecated or changed
# accordingly.
col_name = column_names[0]
row_numbers = table.getWhereList(query, None, 0, 0, 1)
(column,) = table.slice([column_names.index(col_name)], row_numbers).columns
try:
return HttpResponse(
column_to_packed_bits(column), content_type="application/octet-stream"
)
except ValueError:
logger.error("ValueError when getting obj_id_bitmask")
return {"error": "Specified column has invalid type"}
except Exception:
logger.error("Error when getting obj_id_bitmask", exc_info=True)
return {"error", "Unexpected error getting obj_id_bitmask"}
finally:
table.close()


def rowsToByteArray(rows):
maxval = 0
if len(rows) > 0 and isinstance(rows[0][0], float):
def column_to_packed_bits(column):
"""
Convert a column of integer values (strings will be coerced) to a bit mask
where each value present will be set to 1.
"""
if len(column.values) > 0 and isinstance(column.values[0], float):
raise ValueError("Cannot have ID of float")
for obj in rows:
obj_id = int(obj[0])
maxval = max(obj_id, maxval)
bitArray = numpy.zeros(maxval + 1, dtype="uint8")
for obj in rows:
obj_id = int(obj[0])
bitArray[obj_id] = 1
packed = numpy.packbits(bitArray, bitorder="big")
data = bytearray()
for val in packed:
data.append(val)
return data
# Coerce strings to int64 if required. If we have values > 2**63 they
# wouldn't work anyway so signed is okay here. Note that the
# implementation does get weird if the indexes are negative values.
indexes = numpy.array(column.values, dtype="int64")
bits = numpy.zeros(int(indexes.max() + 1), dtype="uint8")
bits[indexes] = 1
return numpy.packbits(bits, bitorder="big").tobytes()


@login_required()
Expand Down
9 changes: 6 additions & 3 deletions test/unit/test_webgateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
# coding=utf-8

import os
import numpy
import pytest
from django.http import HttpResponseBadRequest

from omero.columns import LongColumnI
from omeroweb.webgateway.webgateway_tempfile import WebGatewayTempFile
from omeroweb.webgateway import views
import omero.gateway
Expand Down Expand Up @@ -120,9 +122,10 @@ def testFilenameSize(self):


class TestViews(object):
def testRowstoByteArray(self):
rows = [[1], [2], [7], [11], [12]]
data = views.rowsToByteArray(rows)
def testColumnToPackedBits(self):
column = LongColumnI("test")
column.values = [1, 2, 7, 11, 12]
data = numpy.frombuffer(views.column_to_packed_bits(column), dtype="uint8")
assert data[0] == 97 # 01100001 First, Second and 7th bits
assert data[1] == 24 # 00011000 11th and 12th bits

Expand Down

0 comments on commit f2e0adc

Please sign in to comment.