From a91243a5d1a20afb74a843511bff3becf6661db7 Mon Sep 17 00:00:00 2001 From: frcroth Date: Mon, 25 Nov 2024 15:01:07 +0100 Subject: [PATCH 1/9] Explore neuroglancer precomputed meshes for list Mesh route --- .../controllers/DSMeshController.scala | 6 +- .../datastore/services/MeshFileService.scala | 65 ++++++++++++++++++- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index f2f4d5921c0..4d763ed7c26 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -32,7 +32,11 @@ class DSMeshController @Inject()( urlOrHeaderToken(token, request)) { for { meshFiles <- meshFileService.exploreMeshFiles(organizationId, datasetName, dataLayerName) - } yield Ok(Json.toJson(meshFiles)) + neuroglancerMeshFiles <- meshFileService.exploreNeuroglancerPrecomputedMeshes(organizationId, + datasetName, + dataLayerName) + allMeshFiles = meshFiles ++ neuroglancerMeshFiles + } yield Ok(Json.toJson(allMeshFiles)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala index c8f1809279e..b5293919e67 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala @@ -4,17 +4,33 @@ import com.google.common.io.LittleEndianDataInputStream import com.scalableminds.util.geometry.{Vec3Float, Vec3Int} import com.scalableminds.util.io.PathUtils import com.scalableminds.util.tools.JsonHelper.bool2Box -import com.scalableminds.util.tools.{ByteUtils, Fox, FoxImplicits} +import com.scalableminds.util.tools.{ByteUtils, Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig -import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} +import com.scalableminds.webknossos.datastore.datareaders.precomputed.ShardingSpecification +import com.scalableminds.webknossos.datastore.models.datasource.{ + Category, + DataFormat, + DataLayer, + DataLayerLike, + DataLayerWithMagLocators, + GenericDataSource +} +import com.scalableminds.webknossos.datastore.storage.{ + CachedHdf5File, + DataVaultService, + Hdf5FileCache, + RemoteSourceDescriptor +} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils +import org.apache.pekko.http.scaladsl.model.Uri import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Json, OFormat} import java.io.ByteArrayInputStream +import java.net.URI import java.nio.file.{Path, Paths} import javax.inject.Inject import scala.collection.mutable.ListBuffer @@ -57,6 +73,17 @@ object MeshFileInfo { implicit val jsonFormat: OFormat[MeshFileInfo] = Json.format[MeshFileInfo] } +case class NeuroglancerPrecomputedMeshInfo( + lod_scale_multiplier: Double, + transform: Array[Double], + sharding: Option[ShardingSpecification], + vertex_quantization_bits: Int, +) + +object NeuroglancerPrecomputedMeshInfo { + implicit val jsonFormat: OFormat[NeuroglancerPrecomputedMeshInfo] = Json.format[NeuroglancerPrecomputedMeshInfo] +} + case class NeuroglancerSegmentManifest(chunkShape: Vec3Float, gridOrigin: Vec3Float, numLods: Int, @@ -171,7 +198,8 @@ object WebknossosSegmentInfo { } -class MeshFileService @Inject()(config: DataStoreConfig)(implicit ec: ExecutionContext) +class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataVaultService)( + implicit ec: ExecutionContext) extends FoxImplicits with LazyLogging with Hdf5HashedArrayUtils @@ -210,6 +238,37 @@ class MeshFileService @Inject()(config: DataStoreConfig)(implicit ec: ExecutionC } yield zipped.map(MeshFileInfo(_, _, _)).toSet } + def exploreNeuroglancerPrecomputedMeshes(organizationId: String, + datasetName: String, + dataLayerName: String): Fox[Set[MeshFileInfo]] = { + def exploreMeshesForLayer(dataLayer: DataLayer): Fox[NeuroglancerPrecomputedMeshInfo] = + for { + _ <- Fox.successful(()) + dataLayerWithMagLocators <- tryo(dataLayer.asInstanceOf[DataLayerWithMagLocators]).toFox + firstMag <- dataLayerWithMagLocators.mags.headOption.toFox ?~> "No mags found" + magPath <- firstMag.path.toFox ?~> "Mag has no path" + remotePath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(magPath), None)) + // We are assuming that meshes will be placed in /mesh directory. To be precise, we would first need to check the root info file. + meshDirectory = remotePath.parent / "mesh" + meshInfo = meshDirectory / "info" + meshInfo <- meshInfo.parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" + } yield meshInfo + + def isDataLayerValid(d: DataLayer) = + d.name == dataLayerName && d.category == Category.segmentation && d.dataFormat == DataFormat.neuroglancerPrecomputed + + val datasetDir = dataBaseDir.resolve(organizationId).resolve(datasetName) + val datasetPropertiesFile = datasetDir.resolve("datasource-properties.json") + for { + datasetProperties <- JsonHelper + .validatedJsonFromFile[GenericDataSource[DataLayer]](datasetPropertiesFile, datasetDir) + .toFox + // meshInfos: Seq[(DataLayer, Fox[NeuroglancerPrecomputedMeshInfo])] = datasetProperties.dataLayers.filter(isDataLayerValid).map(d => (d, exploreMeshesForLayer(d))) + meshInfos = datasetProperties.dataLayers.filter(isDataLayerValid).map(exploreMeshesForLayer) + meshInfosResolved: List[NeuroglancerPrecomputedMeshInfo] <- Fox.sequenceOfFulls(meshInfos).toFox + } yield meshInfosResolved.map(_ => MeshFileInfo("mesh", None, 7)).toSet + } + /* Note that null is a valid value here for once. Meshfiles with no information about the meshFilePath will return Fox.empty, while meshfiles with one marked as empty, will return Fox.successful(null) From db6afec68f628ed6e80f772da77770023baaeea6 Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 27 Nov 2024 14:07:53 +0100 Subject: [PATCH 2/9] Implement mesh chunk listing (does not work) [skip ci] --- .../controllers/DSMeshController.scala | 13 +- .../datareaders/precomputed/MurmurHash3.scala | 210 ++++++++++++++++++ ...NeuroglancerPrecomputedShardingUtils.scala | 133 +++++++++++ .../precomputed/PrecomputedArray.scala | 181 +-------------- .../precomputed/PrecomputedHeader.scala | 59 ++++- .../datastore/services/MeshFileService.scala | 82 +++++-- .../datastore/services/NeuroglancerMesh.scala | 8 + 7 files changed, 490 insertions(+), 196 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index 4d763ed7c26..c1cd5370ccc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -72,11 +72,14 @@ class DSMeshController @Inject()( omitMissing = false, urlOrHeaderToken(token, request) ) - chunkInfos <- meshFileService.listMeshChunksForSegmentsMerged(organizationId, - datasetName, - dataLayerName, - request.body.meshFile, - segmentIds) + chunkInfos <- request.body.meshFileType match { + case Some("neuroglancerPrecomputed") => meshFileService.listMeshChunksForNeuroglancerPrecomputedMesh(request.body.meshFilePath, request.body.segmentId) + case _ => meshFileService.listMeshChunksForSegmentsMerged(organizationId, + datasetName, + dataLayerName, + request.body.meshFile, + segmentIds) + } } yield Ok(Json.toJson(chunkInfos)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala new file mode 100644 index 00000000000..b9bd1a6d8b1 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2012 The Regents of The University California. + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.scalableminds.webknossos.datastore.datareaders.precomputed + +import java.lang.Integer.{ rotateLeft => rotl } + +/** + *

The MurmurHash3_x86_128(...) is a fast, non-cryptographic, 128-bit hash + * function that has excellent avalanche and 2-way bit independence properties. + *

+ * + *

The C++ version, revision 147, of the MurmurHash3, written Austin Appleby, + * and which is in the Public Domain, was the inspiration for this + * implementation in Scala. The C++ version can be found at + * SMHasher & MurmurHash.

+ * + * The Scala implementation follows the C++ version closely with two additional features + * tailored for scenarios where object allocation is expensive., e.g where the hash function + * is called several million times. + * Use the method hash(data, seed, length) if you would like to reuse the same input buffer. + * Likewise, use the method hash(data, seed, length, results) if you would like to reuse + * the output buffer which is always of a fixed length 4. + * + * + * @author Ram Sriharsha (harshars at yahoo-inc dot com)

+ */ + +sealed class HashState(var h1: Int, var h2: Int, var h3: Int, var h4: Int) { + + val C1 = 0x239b961b + val C2 = 0xab0e9789 + val C3 = 0x38b34ae5 + val C4 = 0xa1e38b93 + + @inline final def blockMix(k1: Int, k2: Int, k3: Int, k4: Int) { + h1 ^= selfMixK1(k1) + h1 = rotl(h1, 19); h1 += h2; h1 = h1 * 5 + 0x561ccd1b + h2 ^= selfMixK2(k2) + h2 = rotl(h2, 17); h2 += h3; h2 = h2 * 5 + 0x0bcaa747 + h3 ^= selfMixK3(k3) + h3 = rotl(h3, 15); h3 += h4; h3 = h3 * 5 + 0x96cd1c35 + h4 ^= selfMixK4(k4) + h4 = rotl(h4, 13); h4 += h1; h4 = h4 * 5 + 0x32ac3b17 + } + + @inline final def finalMix(k1: Int, k2: Int, k3: Int, k4: Int, len: Int) { + h1 ^= (if (k1 ==0) 0 else selfMixK1(k1)) + h2 ^= (if (k2 ==0) 0 else selfMixK2(k2)) + h3 ^= (if (k3 ==0) 0 else selfMixK3(k3)) + h4 ^= (if (k4 ==0) 0 else selfMixK4(k4)) + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len + + h1 += h2; h1 += h3; h1 += h4 + h2 += h1; h3 += h1; h4 += h1 + + h1 = fmix(h1) + h2 = fmix(h2) + h3 = fmix(h3) + h4 = fmix(h4) + + h1 += h2; h1 += h3; h1 += h4 + h2 += h1; h3 += h1; h4 += h1 + } + + @inline final def fmix(hash: Int): Int = { + var h = hash + h ^= h >> 16 + h *= 0x85ebca6b + h ^= h >> 13 + h *= 0xc2b2ae35 + h ^= h >> 16 + h + } + + @inline final def selfMixK1(k: Int): Int = { + var k1 = k; k1 *= C1; k1 = rotl(k1, 15); k1 *= C2 + k1 + } + + @inline final def selfMixK2(k: Int): Int = { + var k2 = k; k2 *= C2; k2 = rotl(k2, 16); k2 *= C3 + k2 + } + + @inline final def selfMixK3(k: Int): Int = { + var k3 = k; k3 *= C3; k3 = rotl(k3, 17); k3 *= C4 + k3 + } + + @inline final def selfMixK4(k: Int): Int = { + var k4 = k; k4 *= C4; k4 = rotl(k4, 18); k4 *= C1 + k4 + } +} + +object MurmurHash3_x86_128 { + + /** + * @param data is the bytes to be hashed. + * @param seed is the seed for the murmurhash algorithm. + */ + @inline final def hash(data: Array[Byte], seed: Int) + : Array[Int] = { + hash(data, seed, data.length) + } + + /** + * An optimization for reusing memory under large number of hash calls. + * @param data is the bytes to be hashed. + * @param seed is the seed for the murmurhash algorithm. + * @param length is the length of the buffer to use for hashing. + * @param results is the output buffer to store the four ints that are returned, + * should have size at least 4. + */ + @inline final def hash(data: Array[Byte], seed: Int, length: Int, + results: Array[Int]): Unit = { + var i = 0 + val blocks = length >> 4 + val state = new HashState(seed, seed, seed, seed) + while (i < blocks) { + val k1 = getInt(data, 4*i, 4) + val k2 = getInt(data, 4*i + 4, 4) + val k3 = getInt(data, 4*i + 8, 4) + val k4 = getInt(data, 4*i + 12, 4) + state.blockMix(k1, k2, k3, k4) + i += 1 + } + var k1, k2, k3, k4 = 0 + val tail = blocks * 16 + val rem = length - tail + // atmost 15 bytes remain + rem match { + case 12 | 13 | 14 | 15 => { + k1 = getInt(data, tail, 4) + k2 = getInt(data, tail + 4, 4) + k3 = getInt(data, tail + 8, 4) + k4 = getInt(data, tail + 12, rem - 12) + } + case 8 | 9 | 10 | 11 => { + k1 = getInt(data, tail, 4) + k2 = getInt(data, tail + 4, 4) + k3 = getInt(data, tail + 8, rem - 8) + } + case 4 | 5 | 6 | 7 => { + k1 = getInt(data, tail, 4) + k2 = getInt(data, tail + 4, rem - 4) + } + case 0 | 1 | 2 | 3 => { + k1 = getInt(data, tail, rem) + } + } + state.finalMix(k1, k2, k3, k4, length) + results(0) = state.h1 + results(1) = state.h2 + results(2) = state.h3 + results(3) = state.h4 + } + + /** + * An optimization for reusing memory under large number of hash calls. + * @param data is the bytes to be hashed. + * @param seed is the seed for the murmurhash algorithm. + * @param length is the length of the buffer to use for hashing. + * @return is an array of size 4 that holds the four ints that comprise the 128 bit hash. + */ + @inline final def hash(data: Array[Byte], seed: Int, length: Int) + : Array[Int] = { + val results = new Array[Int](4) + hash(data, seed, length, results) + results + } + + /** + * Utility function to convert a byte array into an int, filling in zeros + * if the byte array is not big enough. + * @param data is the byte array to be converted to an int. + * @param index is the starting index in the byte array. + * @param rem is the remainder of the byte array to examine. + */ + @inline final def getInt(data: Array[Byte], index: Int, rem: Int): Int = { + rem match { + case 3 => data(index) << 24 | + (data(index + 1) & 0xFF) << 16 | + (data(index + 2) & 0xFF) << 8 + case 2 => data(index) << 24 | + (data(index + 1) & 0xFF) << 16 + case 1 => data(index) << 24 + case 0 => 0 + case _ => data(index) << 24 | + (data(index + 1) & 0xFF) << 16 | + (data(index + 2) & 0xFF) << 8 | + (data(index + 3) & 0xFF) + } + } +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala new file mode 100644 index 00000000000..32a57cacc75 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala @@ -0,0 +1,133 @@ +package com.scalableminds.webknossos.datastore.datareaders.precomputed + +import com.scalableminds.util.cache.AlfuCache +import com.scalableminds.util.io.ZipIO +import com.scalableminds.util.tools.Fox +import com.scalableminds.webknossos.datastore.datavault.VaultPath +import net.liftweb.common.Box +import net.liftweb.common.Box.tryo + +import java.nio.{ByteBuffer, ByteOrder} +import scala.collection.immutable.NumericRange +import scala.concurrent.ExecutionContext + +trait NeuroglancerPrecomputedShardingUtils { + + // SHARDING + // Implemented according to https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md, + // directly adapted from https://github.com/scalableminds/webknossos-connect/blob/master/wkconnect/backends/neuroglancer/sharding.py. + + val shardingSpecification: ShardingSpecification + + private val minishardIndexCache: AlfuCache[(VaultPath, Int), Array[(Long, Long, Long)]] = + AlfuCache() + + private val shardIndexCache: AlfuCache[VaultPath, Array[Byte]] = + AlfuCache() + + private lazy val minishardCount = 1 << shardingSpecification.minishard_bits + + protected lazy val shardIndexRange: NumericRange.Exclusive[Long] = { + val end = minishardCount * 16 + Range.Long(0, end, 1) + } + + private def getShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext): Fox[Array[Byte]] = + shardIndexCache.getOrLoad(shardPath, readShardIndex) + + private def readShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext): Fox[Array[Byte]] = + shardPath.readBytes(Some(shardIndexRange)) + + private def parseShardIndex(index: Array[Byte]): Seq[(Long, Long)] = + // See https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#shard-index-format + index + .grouped(16) // 16 Bytes: 2 uint64 numbers: start_offset, end_offset + .map((bytes: Array[Byte]) => { + (BigInt(bytes.take(8).reverse).toLong, BigInt(bytes.slice(8, 16).reverse).toLong) // bytes reversed because they are stored little endian + }) + .toSeq + + private def getMinishardIndexRange(minishardNumber: Int, + parsedShardIndex: Seq[(Long, Long)]): NumericRange.Exclusive[Long] = { + val miniShardIndexStart: Long = (shardIndexRange.end) + parsedShardIndex(minishardNumber)._1 + val miniShardIndexEnd: Long = (shardIndexRange.end) + parsedShardIndex(minishardNumber)._2 + Range.Long(miniShardIndexStart, miniShardIndexEnd, 1) + } + + private def decodeMinishardIndex(bytes: Array[Byte]) = + shardingSpecification.minishard_index_encoding match { + case "gzip" => ZipIO.gunzip(bytes) + case _ => bytes + + } + + private def parseMinishardIndex(input: Array[Byte]): Box[Array[(Long, Long, Long)]] = tryo { + val bytes = decodeMinishardIndex(input) + /* + From: https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#minishard-index-format + The decoded "minishard index" is a binary string of 24*n bytes, specifying a contiguous C-order array of [3, n] + uint64le values. + */ + val n = bytes.length / 24 + val buf = ByteBuffer.allocate(bytes.length) + buf.put(bytes) + + val longArray = new Array[Long](n * 3) + buf.position(0) + buf.order(ByteOrder.LITTLE_ENDIAN) + buf.asLongBuffer().get(longArray) + // longArray is row major / C-order + /* + From: https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#minishard-index-format + Values array[0, 0], ..., array[0, n-1] specify the chunk IDs in the minishard, and are delta encoded, such that + array[0, 0] is equal to the ID of the first chunk, and the ID of chunk i is equal to the sum + of array[0, 0], ..., array[0, i]. + */ + val chunkIds = new Array[Long](n) + chunkIds(0) = longArray(0) + for (i <- 1 until n) { + chunkIds(i) = longArray(i) + chunkIds(i - 1) + } + /* + From: https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#minishard-index-format + The size of the data for chunk i is stored as array[2, i]. + Values array[1, 0], ..., array[1, n-1] specify the starting offsets in the shard file of the data corresponding to + each chunk, and are also delta encoded relative to the end of the prior chunk, such that the starting offset of the + first chunk is equal to shard_index_end + array[1, 0], and the starting offset of chunk i is the sum of + shard_index_end + array[1, 0], ..., array[1, i] and array[2, 0], ..., array[2, i-1]. + */ + val chunkSizes = longArray.slice(2 * n, 3 * n) + val chunkStartOffsets = new Array[Long](n) + chunkStartOffsets(0) = longArray(n) + for (i <- 1 until n) { + val startOffsetIndex = i + n + chunkStartOffsets(i) = chunkStartOffsets(i - 1) + longArray(startOffsetIndex) + chunkSizes(i - 1) + } + + chunkIds.lazyZip(chunkStartOffsets).lazyZip(chunkSizes).toArray + } + + def getMinishardIndex(shardPath: VaultPath, minishardNumber: Int)( + implicit ec: ExecutionContext): Fox[Array[(Long, Long, Long)]] = + minishardIndexCache.getOrLoad((shardPath, minishardNumber), readMinishardIndex) + + private def readMinishardIndex(vaultPathAndMinishardNumber: (VaultPath, Int))( + implicit ec: ExecutionContext): Fox[Array[(Long, Long, Long)]] = { + val (vaultPath, minishardNumber) = vaultPathAndMinishardNumber + for { + index <- getShardIndex(vaultPath) + parsedIndex = parseShardIndex(index) + minishardIndexRange = getMinishardIndexRange(minishardNumber, parsedIndex) + indexRaw <- vaultPath.readBytes(Some(minishardIndexRange)) + minishardIndex <- parseMinishardIndex(indexRaw) + } yield minishardIndex + } + + def getChunkRange(chunkId: Long, minishardIndex: Array[(Long, Long, Long)])( + implicit ec: ExecutionContext): Fox[NumericRange.Exclusive[Long]] = + for { + chunkSpecification <- Fox.option2Fox(minishardIndex.find(_._1 == chunkId)) ?~> s"Could not find chunk id $chunkId in minishard index" + chunkStart = (shardIndexRange.end) + chunkSpecification._2 + chunkEnd = (shardIndexRange.end) + chunkSpecification._2 + chunkSpecification._3 + } yield Range.Long(chunkStart, chunkEnd, 1) +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedArray.scala index f7cc98ef8d1..c08944eed87 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedArray.scala @@ -1,7 +1,6 @@ package com.scalableminds.webknossos.datastore.datareaders.precomputed import com.scalableminds.util.cache.AlfuCache -import com.scalableminds.util.io.ZipIO import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.{AxisOrder, DatasetArray} import com.scalableminds.webknossos.datastore.datavault.VaultPath @@ -10,12 +9,9 @@ import com.scalableminds.webknossos.datastore.models.datasource.AdditionalAxis import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo -import java.nio.ByteOrder -import java.nio.ByteBuffer import scala.collection.immutable.NumericRange import scala.concurrent.ExecutionContext import com.scalableminds.util.tools.Fox.{box2Fox, option2Fox} -import net.liftweb.common.Box import ucar.ma2.{Array => MultiArray} object PrecomputedArray extends LazyLogging { @@ -65,7 +61,8 @@ class PrecomputedArray(vaultPath: VaultPath, additionalAxes, sharedChunkContentsCache) with FoxImplicits - with LazyLogging { + with LazyLogging + with NeuroglancerPrecomputedShardingUtils { lazy val voxelOffset: Array[Int] = header.precomputedScale.voxel_offset.getOrElse(Array(0, 0, 0)) override protected def getChunkFilename(chunkIndex: Array[Int]): String = { @@ -78,183 +75,17 @@ class PrecomputedArray(vaultPath: VaultPath, .mkString(header.dimension_separator.toString) } - // SHARDING - // Implemented according to https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md, - // directly adapted from https://github.com/scalableminds/webknossos-connect/blob/master/wkconnect/backends/neuroglancer/sharding.py. - - private val shardIndexCache: AlfuCache[VaultPath, Array[Byte]] = - AlfuCache() - - private val minishardIndexCache: AlfuCache[(VaultPath, Int), Array[(Long, Long, Long)]] = - AlfuCache() + val shardingSpecification: ShardingSpecification = + header.precomputedScale.sharding.getOrElse(ShardingSpecification.empty) private def getHashForChunk(chunkIndex: Array[Int]): Long = CompressedMortonCode.encode(chunkIndex, header.gridSize) - private lazy val minishardMask = { - header.precomputedScale.sharding match { - case Some(shardingSpec: ShardingSpecification) => - if (shardingSpec.minishard_bits == 0) { - 0 - } else { - var minishardMask = 1L - for (_ <- 0 until shardingSpec.minishard_bits - 1) { - minishardMask <<= 1 - minishardMask |= 1 - } - minishardMask - } - case None => 0 - } - } - - private lazy val shardMask = { - header.precomputedScale.sharding match { - case Some(shardingSpec: ShardingSpecification) => - val oneMask = Long.MinValue // 0xFFFFFFFFFFFFFFFF - val cursor = shardingSpec.minishard_bits + shardingSpec.shard_bits - val shardMask = ~((oneMask >> cursor) << cursor) - shardMask & (~minishardMask) - case None => 0 - } - } - - private lazy val minishardCount = 1 << header.precomputedScale.sharding.map(_.minishard_bits).getOrElse(0) - - private lazy val shardIndexRange: NumericRange.Exclusive[Long] = { - val end = minishardCount * 16 - Range.Long(0, end, 1) - } - - private def decodeMinishardIndex(bytes: Array[Byte]) = - header.precomputedScale.sharding match { - case Some(shardingSpec: ShardingSpecification) => - shardingSpec.minishard_index_encoding match { - case "gzip" => ZipIO.gunzip(bytes) - case _ => bytes - } - case _ => bytes - } - - private def getShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext): Fox[Array[Byte]] = - shardIndexCache.getOrLoad(shardPath, readShardIndex) - - private def readShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext): Fox[Array[Byte]] = - shardPath.readBytes(Some(shardIndexRange)) - - private def parseShardIndex(index: Array[Byte]): Seq[(Long, Long)] = - // See https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#shard-index-format - index - .grouped(16) // 16 Bytes: 2 uint64 numbers: start_offset, end_offset - .map((bytes: Array[Byte]) => { - (BigInt(bytes.take(8).reverse).toLong, BigInt(bytes.slice(8, 16).reverse).toLong) // bytes reversed because they are stored little endian - }) - .toSeq - - private def getMinishardInfo(chunkHash: Long): (Long, Long) = - header.precomputedScale.sharding match { - case Some(shardingSpec: ShardingSpecification) => - val rawChunkIdentifier = chunkHash >> shardingSpec.preshift_bits - val chunkIdentifier = shardingSpec.hashFunction(rawChunkIdentifier) - val minishardNumber = chunkIdentifier & minishardMask - val shardNumber = (chunkIdentifier & shardMask) >> shardingSpec.minishard_bits - (shardNumber, minishardNumber) - case None => (0, 0) - } - - private def getPathForShard(shardNumber: Long): VaultPath = { - val shardBits = header.precomputedScale.sharding.map(_.shard_bits.toFloat).getOrElse(0f) - if (shardBits == 0) { - vaultPath / "0.shard" - } else { - val shardString = String.format(s"%1$$${(shardBits / 4).ceil.toInt}s", shardNumber.toHexString).replace(' ', '0') - vaultPath / s"$shardString.shard" - } - - } - - private def getMinishardIndexRange(minishardNumber: Int, - parsedShardIndex: Seq[(Long, Long)]): NumericRange.Exclusive[Long] = { - val miniShardIndexStart: Long = (shardIndexRange.end) + parsedShardIndex(minishardNumber)._1 - val miniShardIndexEnd: Long = (shardIndexRange.end) + parsedShardIndex(minishardNumber)._2 - Range.Long(miniShardIndexStart, miniShardIndexEnd, 1) - } - - private def parseMinishardIndex(input: Array[Byte]): Box[Array[(Long, Long, Long)]] = tryo { - val bytes = decodeMinishardIndex(input) - /* - From: https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#minishard-index-format - The decoded "minishard index" is a binary string of 24*n bytes, specifying a contiguous C-order array of [3, n] - uint64le values. - */ - val n = bytes.length / 24 - val buf = ByteBuffer.allocate(bytes.length) - buf.put(bytes) - - val longArray = new Array[Long](n * 3) - buf.position(0) - buf.order(ByteOrder.LITTLE_ENDIAN) - buf.asLongBuffer().get(longArray) - // longArray is row major / C-order - /* - From: https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#minishard-index-format - Values array[0, 0], ..., array[0, n-1] specify the chunk IDs in the minishard, and are delta encoded, such that - array[0, 0] is equal to the ID of the first chunk, and the ID of chunk i is equal to the sum - of array[0, 0], ..., array[0, i]. - */ - val chunkIds = new Array[Long](n) - chunkIds(0) = longArray(0) - for (i <- 1 until n) { - chunkIds(i) = longArray(i) + chunkIds(i - 1) - } - /* - From: https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/sharded.md#minishard-index-format - The size of the data for chunk i is stored as array[2, i]. - Values array[1, 0], ..., array[1, n-1] specify the starting offsets in the shard file of the data corresponding to - each chunk, and are also delta encoded relative to the end of the prior chunk, such that the starting offset of the - first chunk is equal to shard_index_end + array[1, 0], and the starting offset of chunk i is the sum of - shard_index_end + array[1, 0], ..., array[1, i] and array[2, 0], ..., array[2, i-1]. - */ - val chunkSizes = longArray.slice(2 * n, 3 * n) - val chunkStartOffsets = new Array[Long](n) - chunkStartOffsets(0) = longArray(n) - for (i <- 1 until n) { - val startOffsetIndex = i + n - chunkStartOffsets(i) = chunkStartOffsets(i - 1) + longArray(startOffsetIndex) + chunkSizes(i - 1) - } - - chunkIds.lazyZip(chunkStartOffsets).lazyZip(chunkSizes).toArray - } - - private def getMinishardIndex(shardPath: VaultPath, minishardNumber: Int)( - implicit ec: ExecutionContext): Fox[Array[(Long, Long, Long)]] = - minishardIndexCache.getOrLoad((shardPath, minishardNumber), readMinishardIndex) - - private def readMinishardIndex(vaultPathAndMinishardNumber: (VaultPath, Int))( - implicit ec: ExecutionContext): Fox[Array[(Long, Long, Long)]] = { - val (vaultPath, minishardNumber) = vaultPathAndMinishardNumber - for { - index <- getShardIndex(vaultPath) - parsedIndex = parseShardIndex(index) - minishardIndexRange = getMinishardIndexRange(minishardNumber, parsedIndex) - indexRaw <- vaultPath.readBytes(Some(minishardIndexRange)) - minishardIndex <- parseMinishardIndex(indexRaw) - } yield minishardIndex - } - - private def getChunkRange(chunkId: Long, minishardIndex: Array[(Long, Long, Long)])( - implicit ec: ExecutionContext): Fox[NumericRange.Exclusive[Long]] = - for { - chunkSpecification <- Fox.option2Fox(minishardIndex.find(_._1 == chunkId)) ?~> s"Could not find chunk id $chunkId in minishard index" - chunkStart = (shardIndexRange.end) + chunkSpecification._2 - chunkEnd = (shardIndexRange.end) + chunkSpecification._2 + chunkSpecification._3 - } yield Range.Long(chunkStart, chunkEnd, 1) - override def getShardedChunkPathAndRange(chunkIndex: Array[Int])( implicit ec: ExecutionContext): Fox[(VaultPath, NumericRange[Long])] = { val chunkIdentifier = getHashForChunk(chunkIndex) - val minishardInfo = getMinishardInfo(chunkIdentifier) - val shardPath = getPathForShard(minishardInfo._1) + val minishardInfo = shardingSpecification.getMinishardInfo(chunkIdentifier) + val shardPath = shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) for { minishardIndex <- getMinishardIndex(shardPath, minishardInfo._2.toInt) ?~> f"Could not get minishard index for chunkIndex ${chunkIndex .mkString(",")}" diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala index 8ac64cfb075..fcc9e9e01d0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala @@ -1,10 +1,12 @@ package com.scalableminds.webknossos.datastore.datareaders.precomputed import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.util.tools.ByteUtils import com.scalableminds.webknossos.datastore.datareaders.ArrayDataType.ArrayDataType import com.scalableminds.webknossos.datastore.datareaders.ArrayOrder.ArrayOrder import com.scalableminds.webknossos.datastore.datareaders.DimensionSeparator.DimensionSeparator import com.scalableminds.webknossos.datastore.datareaders.{ArrayOrder, Compressor, DatasetHeader, DimensionSeparator} +import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.helpers.JsonImplicits import play.api.libs.json.{Format, JsResult, JsValue, Json} import play.api.libs.json.Json.WithDefaultValues @@ -79,11 +81,60 @@ case class ShardingSpecification(`@type`: String, minishard_bits: Int, shard_bits: Long, minishard_index_encoding: String = "raw", - data_encoding: String = "raw") { + data_encoding: String = "raw") + extends ByteUtils { def hashFunction(input: Long): Long = - if (hash == "identity") input - else ??? // not implemented: murmurhash3_x86_128 + hash match { + case "identity" => input + case "murmurhash3_x86_128" => applyMurmurHash3(input) + case _ => throw new IllegalArgumentException(s"Unsupported hash function: $hash") + } + + private def applyMurmurHash3(input: Long): Long = { + // he MurmurHash3_x86_128 hash function applied to the shifted chunk ID in little endian encoding. The low 8 bytes of the resultant hash code are treated as a little endian 64-bit number. + val bytes = longToBytes(input) + val hash = MurmurHash3_x86_128.hash(bytes, 0) + val result = hash(0) & 0xFFFFFFFFL | (hash(1) & 0xFFFFFFFFL) << 32 + result + } + + private lazy val minishardMask = { + if (minishard_bits == 0) { + 0 + } else { + var minishardMask = 1L + for (_ <- 0 until minishard_bits - 1) { + minishardMask <<= 1 + minishardMask |= 1 + } + minishardMask + } + } + + private lazy val shardMask = { + val oneMask = Long.MinValue // 0xFFFFFFFFFFFFFFFF + val cursor = minishard_bits + shard_bits + val shardMask = ~((oneMask >> cursor) << cursor) + shardMask & (~minishardMask) + } + + def getMinishardInfo(chunkHash: Long): (Long, Long) = { + val rawChunkIdentifier = chunkHash >> preshift_bits + val chunkIdentifier = hashFunction(rawChunkIdentifier) + val minishardNumber = chunkIdentifier & minishardMask + val shardNumber = (chunkIdentifier & shardMask) >> minishard_bits + (shardNumber, minishardNumber) + } + + def getPathForShard(base: VaultPath, shardNumber: Long): VaultPath = + if (shard_bits == 0) { + base / "0.shard" + } else { + val shardString = String.format(s"%1$$${(shard_bits / 4).ceil.toInt}s", shardNumber.toHexString).replace(' ', '0') + base / s"$shardString.shard" + } + } object ShardingSpecification extends JsonImplicits { @@ -94,6 +145,8 @@ object ShardingSpecification extends JsonImplicits { override def writes(shardingSpecification: ShardingSpecification): JsValue = Json.writes[ShardingSpecification].writes(shardingSpecification) } + + def empty: ShardingSpecification = ShardingSpecification("neuroglancer_uint64_sharded_v1", 0, "identity", 0, 0) } object PrecomputedScale extends JsonImplicits { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala index b5293919e67..d01f4b15bc5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala @@ -1,17 +1,18 @@ package com.scalableminds.webknossos.datastore.services import com.google.common.io.LittleEndianDataInputStream +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.{Vec3Float, Vec3Int} import com.scalableminds.util.io.PathUtils import com.scalableminds.util.tools.JsonHelper.bool2Box import com.scalableminds.util.tools.{ByteUtils, Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.datareaders.precomputed.ShardingSpecification +import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.{ Category, DataFormat, DataLayer, - DataLayerLike, DataLayerWithMagLocators, GenericDataSource } @@ -25,7 +26,6 @@ import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils -import org.apache.pekko.http.scaladsl.model.Uri import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Json, OFormat} @@ -38,6 +38,8 @@ import scala.concurrent.ExecutionContext case class ListMeshChunksRequest( meshFile: String, + meshFilePath: Option[String], + meshFileType: Option[String], segmentId: Long ) @@ -65,6 +67,8 @@ object MeshChunkDataRequestList { case class MeshFileInfo( meshFileName: String, + meshFilePath: Option[String], + meshFileType: Option[String], mappingName: Option[String], formatVersion: Long ) @@ -78,7 +82,9 @@ case class NeuroglancerPrecomputedMeshInfo( transform: Array[Double], sharding: Option[ShardingSpecification], vertex_quantization_bits: Int, -) +) { + def transform2DArray: Array[Array[Double]] = transform.grouped(4).toArray +} object NeuroglancerPrecomputedMeshInfo { implicit val jsonFormat: OFormat[NeuroglancerPrecomputedMeshInfo] = Json.format[NeuroglancerPrecomputedMeshInfo] @@ -235,13 +241,28 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV mappingNameOptions = mappingNameBoxes.map(_.toOption) zipped = meshFileNames.lazyZip(mappingNameOptions).lazyZip(meshFileVersions) - } yield zipped.map(MeshFileInfo(_, _, _)).toSet + } yield + zipped + .map({ + case (fileName, mappingName, fileVersion) => + MeshFileInfo(fileName, None, Some("local"), mappingName, fileVersion) + }) + .toSet } + private lazy val neuroglancerPrecomputedMeshInfoCache = AlfuCache[VaultPath, NeuroglancerPrecomputedMeshInfo](100) + + private def loadRemoteMeshInfo(meshPath: VaultPath): Fox[NeuroglancerPrecomputedMeshInfo] = + for { + _ <- Fox.successful(()) + meshInfoPath = meshPath / "info" + meshInfo <- meshInfoPath.parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" + } yield meshInfo + def exploreNeuroglancerPrecomputedMeshes(organizationId: String, datasetName: String, dataLayerName: String): Fox[Set[MeshFileInfo]] = { - def exploreMeshesForLayer(dataLayer: DataLayer): Fox[NeuroglancerPrecomputedMeshInfo] = + def exploreMeshesForLayer(dataLayer: DataLayer): Fox[(NeuroglancerPrecomputedMeshInfo, VaultPath)] = for { _ <- Fox.successful(()) dataLayerWithMagLocators <- tryo(dataLayer.asInstanceOf[DataLayerWithMagLocators]).toFox @@ -249,10 +270,9 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV magPath <- firstMag.path.toFox ?~> "Mag has no path" remotePath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(magPath), None)) // We are assuming that meshes will be placed in /mesh directory. To be precise, we would first need to check the root info file. - meshDirectory = remotePath.parent / "mesh" - meshInfo = meshDirectory / "info" - meshInfo <- meshInfo.parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" - } yield meshInfo + meshPath = remotePath.parent / "mesh" + meshInfo <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(meshPath, loadRemoteMeshInfo) + } yield (meshInfo, meshPath) def isDataLayerValid(d: DataLayer) = d.name == dataLayerName && d.category == Category.segmentation && d.dataFormat == DataFormat.neuroglancerPrecomputed @@ -263,12 +283,29 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV datasetProperties <- JsonHelper .validatedJsonFromFile[GenericDataSource[DataLayer]](datasetPropertiesFile, datasetDir) .toFox - // meshInfos: Seq[(DataLayer, Fox[NeuroglancerPrecomputedMeshInfo])] = datasetProperties.dataLayers.filter(isDataLayerValid).map(d => (d, exploreMeshesForLayer(d))) - meshInfos = datasetProperties.dataLayers.filter(isDataLayerValid).map(exploreMeshesForLayer) - meshInfosResolved: List[NeuroglancerPrecomputedMeshInfo] <- Fox.sequenceOfFulls(meshInfos).toFox - } yield meshInfosResolved.map(_ => MeshFileInfo("mesh", None, 7)).toSet + meshInfosAndInfoPaths = datasetProperties.dataLayers.filter(isDataLayerValid).map(exploreMeshesForLayer) + meshInfosResolved: List[(NeuroglancerPrecomputedMeshInfo, VaultPath)] <- Fox + .sequenceOfFulls(meshInfosAndInfoPaths) + .toFox + } yield + meshInfosResolved + .map({ + case (_, vaultPath) => + MeshFileInfo("mesh", Some(vaultPath.toString), Some("neuroglancerPrecomputed"), None, 7) + }) + .toSet } + // TODOs: + // - Move all neuroglancer precomputed mesh handling to different service? + // - Add enum for mesh types + // - ??? Maybe change API to have a mesh object instead of different keys for name, path, type + // - Support tokens? + // - Can some sharding stuff be unified between array and mesh? + // - Support non sharding meshes? + // - Tests? + // - Need to implement murmurhash3_x86_128 + /* Note that null is a valid value here for once. Meshfiles with no information about the meshFilePath will return Fox.empty, while meshfiles with one marked as empty, will return Fox.successful(null) @@ -434,6 +471,25 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV (neuroglancerStart, neuroglancerEnd) } + def listMeshChunksForNeuroglancerPrecomputedMesh(meshFilePathOpt: Option[String], + segmentId: Long): Fox[WebknossosSegmentInfo] = + for { + meshFilePath <- meshFilePathOpt.toFox ?~> "No mesh file path provided" + vaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(meshFilePath), None)) + meshInfo <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) + mesh = NeuroglancerMesh(meshInfo) + minishardInfo = mesh.shardingSpecification.getMinishardInfo(segmentId) + shardUrl = mesh.shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) + minishardIndex <- mesh.getMinishardIndex(shardUrl, minishardInfo._2.toInt) + chunkRange <- mesh.getChunkRange(segmentId, minishardIndex) + chunk <- shardUrl.readBytes(Some(chunkRange)) + segmentManifest = NeuroglancerSegmentManifest.fromBytes(chunk) + meshSegmentInfo = enrichSegmentInfo(segmentManifest, meshInfo.lod_scale_multiplier, chunkRange.start, segmentId) + transform = meshInfo.transform2DArray + encoding = "draco" + wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(List(meshSegmentInfo), encoding, transform) + } yield wkChunkInfos + def readMeshChunk(organizationId: String, datasetName: String, dataLayerName: String, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala new file mode 100644 index 00000000000..60af07b1c0a --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala @@ -0,0 +1,8 @@ +package com.scalableminds.webknossos.datastore.services + +import com.scalableminds.webknossos.datastore.datareaders.precomputed.{NeuroglancerPrecomputedShardingUtils, ShardingSpecification} + +case class NeuroglancerMesh(meshInfo: NeuroglancerPrecomputedMeshInfo) extends NeuroglancerPrecomputedShardingUtils{ + override val shardingSpecification: ShardingSpecification = meshInfo.sharding.get // TODO: Remove get + +} From 1de5f6c5856a4e6fb5842939c3bcd2c08945117d Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 27 Nov 2024 14:20:19 +0100 Subject: [PATCH 3/9] Fix wrong shard path --- .../datastore/datareaders/precomputed/PrecomputedHeader.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala index fcc9e9e01d0..8e2a558982e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala @@ -131,7 +131,8 @@ case class ShardingSpecification(`@type`: String, if (shard_bits == 0) { base / "0.shard" } else { - val shardString = String.format(s"%1$$${(shard_bits / 4).ceil.toInt}s", shardNumber.toHexString).replace(' ', '0') + val shardString = + String.format(s"%1$$${(shard_bits.toFloat / 4).ceil.toInt}s", shardNumber.toHexString).replace(' ', '0') base / s"$shardString.shard" } From 11dfa9136df758bf06110fb979282af953efaa52 Mon Sep 17 00:00:00 2001 From: frcroth Date: Mon, 2 Dec 2024 16:08:18 +0100 Subject: [PATCH 4/9] Add murmurhash_x86_128 hashing --- test/backend/MurmurHashTestSuite.scala | 19 ++ .../datareaders/precomputed/MurmurHash3.scala | 284 ++++++------------ ...NeuroglancerPrecomputedShardingUtils.scala | 12 +- .../precomputed/PrecomputedHeader.scala | 12 +- .../datastore/services/MeshFileService.scala | 5 +- .../datastore/services/NeuroglancerMesh.scala | 7 +- 6 files changed, 136 insertions(+), 203 deletions(-) create mode 100644 test/backend/MurmurHashTestSuite.scala diff --git a/test/backend/MurmurHashTestSuite.scala b/test/backend/MurmurHashTestSuite.scala new file mode 100644 index 00000000000..add2f0cfc30 --- /dev/null +++ b/test/backend/MurmurHashTestSuite.scala @@ -0,0 +1,19 @@ +package backend + +import com.scalableminds.webknossos.datastore.datareaders.precomputed.MurmurHash3 +import org.scalatestplus.play.PlaySpec + +class MurmurHashTestSuite extends PlaySpec { + + "Murmur hash" should { + "return the correct hash" in { + val keyString = "Hello World!" + val keyBytes = keyString.getBytes + val seed = 0 + val expectedHash = -1505357907696379773L + val actualHash = MurmurHash3.hash64(keyBytes, seed) + + assert(actualHash == expectedHash) + } + } +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala index b9bd1a6d8b1..f0363c2e057 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/MurmurHash3.scala @@ -1,210 +1,118 @@ -/* - * Copyright (C) 2012 The Regents of The University California. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - package com.scalableminds.webknossos.datastore.datareaders.precomputed -import java.lang.Integer.{ rotateLeft => rotl } - -/** - *

The MurmurHash3_x86_128(...) is a fast, non-cryptographic, 128-bit hash - * function that has excellent avalanche and 2-way bit independence properties. - *

- * - *

The C++ version, revision 147, of the MurmurHash3, written Austin Appleby, - * and which is in the Public Domain, was the inspiration for this - * implementation in Scala. The C++ version can be found at - * SMHasher & MurmurHash.

- * - * The Scala implementation follows the C++ version closely with two additional features - * tailored for scenarios where object allocation is expensive., e.g where the hash function - * is called several million times. - * Use the method hash(data, seed, length) if you would like to reuse the same input buffer. - * Likewise, use the method hash(data, seed, length, results) if you would like to reuse - * the output buffer which is always of a fixed length 4. - * - * - * @author Ram Sriharsha (harshars at yahoo-inc dot com)

- */ - -sealed class HashState(var h1: Int, var h2: Int, var h3: Int, var h4: Int) { - - val C1 = 0x239b961b - val C2 = 0xab0e9789 - val C3 = 0x38b34ae5 - val C4 = 0xa1e38b93 - - @inline final def blockMix(k1: Int, k2: Int, k3: Int, k4: Int) { - h1 ^= selfMixK1(k1) - h1 = rotl(h1, 19); h1 += h2; h1 = h1 * 5 + 0x561ccd1b - h2 ^= selfMixK2(k2) - h2 = rotl(h2, 17); h2 += h3; h2 = h2 * 5 + 0x0bcaa747 - h3 ^= selfMixK3(k3) - h3 = rotl(h3, 15); h3 += h4; h3 = h3 * 5 + 0x96cd1c35 - h4 ^= selfMixK4(k4) - h4 = rotl(h4, 13); h4 += h1; h4 = h4 * 5 + 0x32ac3b17 +object MurmurHash3 { + + private def fmix(h: Int): Int = { + var hash = h + hash ^= (hash >>> 16) + hash = (hash * 0x85EBCA6B) & 0xFFFFFFFF + hash ^= (hash >>> 13) + hash = (hash * 0xC2B2AE35) & 0xFFFFFFFF + hash ^= (hash >>> 16) + hash } - @inline final def finalMix(k1: Int, k2: Int, k3: Int, k4: Int, len: Int) { - h1 ^= (if (k1 ==0) 0 else selfMixK1(k1)) - h2 ^= (if (k2 ==0) 0 else selfMixK2(k2)) - h3 ^= (if (k3 ==0) 0 else selfMixK3(k3)) - h4 ^= (if (k4 ==0) 0 else selfMixK4(k4)) - h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len + private def hash128(key: Array[Byte], seed: Int): BigInt = { + val c1 = 0x239B961B + val c2 = 0xAB0E9789 + val c3 = 0x38B34AE5 + val c4 = 0xA1E38B93 - h1 += h2; h1 += h3; h1 += h4 - h2 += h1; h3 += h1; h4 += h1 + val length = key.length + val nblocks = length / 16 - h1 = fmix(h1) - h2 = fmix(h2) - h3 = fmix(h3) - h4 = fmix(h4) + var h1 = seed + var h2 = seed + var h3 = seed + var h4 = seed - h1 += h2; h1 += h3; h1 += h4 - h2 += h1; h3 += h1; h4 += h1 - } + // Process blocks + for (i <- 0 until nblocks) { + val block = key.slice(i * 16, i * 16 + 16) + val k1 = BigInt(block.slice(0, 4).reverse).toInt + val k2 = BigInt(block.slice(4, 8).reverse).toInt + val k3 = BigInt(block.slice(8, 12).reverse).toInt + val k4 = BigInt(block.slice(12, 16).reverse).toInt - @inline final def fmix(hash: Int): Int = { - var h = hash - h ^= h >> 16 - h *= 0x85ebca6b - h ^= h >> 13 - h *= 0xc2b2ae35 - h ^= h >> 16 - h - } + h1 ^= Integer.rotateLeft((k1 * c1) & 0xFFFFFFFF, 15) * c2 & 0xFFFFFFFF + h1 = (Integer.rotateLeft(h1, 19) + h2) * 5 + 0x561CCD1B & 0xFFFFFFFF - @inline final def selfMixK1(k: Int): Int = { - var k1 = k; k1 *= C1; k1 = rotl(k1, 15); k1 *= C2 - k1 - } + h2 ^= Integer.rotateLeft((k2 * c2) & 0xFFFFFFFF, 16) * c3 & 0xFFFFFFFF + h2 = (Integer.rotateLeft(h2, 17) + h3) * 5 + 0x0BCAA747 & 0xFFFFFFFF - @inline final def selfMixK2(k: Int): Int = { - var k2 = k; k2 *= C2; k2 = rotl(k2, 16); k2 *= C3 - k2 - } + h3 ^= Integer.rotateLeft((k3 * c3) & 0xFFFFFFFF, 17) * c4 & 0xFFFFFFFF + h3 = (Integer.rotateLeft(h3, 15) + h4) * 5 + 0x96CD1C35 & 0xFFFFFFFF - @inline final def selfMixK3(k: Int): Int = { - var k3 = k; k3 *= C3; k3 = rotl(k3, 17); k3 *= C4 - k3 - } + h4 ^= Integer.rotateLeft((k4 * c4) & 0xFFFFFFFF, 18) * c1 & 0xFFFFFFFF + h4 = (Integer.rotateLeft(h4, 13) + h1) * 5 + 0x32AC3B17 & 0xFFFFFFFF + } - @inline final def selfMixK4(k: Int): Int = { - var k4 = k; k4 *= C4; k4 = rotl(k4, 18); k4 *= C1 - k4 - } -} + // Tail + val tail = key.slice(nblocks * 16, length) + var k1, k2, k3, k4 = 0 + + tail.zipWithIndex.foreach { + case (byte, i) => + val shift = (i % 4) * 8 + i / 4 match { + case 0 => k1 |= (byte & 0xFF) << shift + case 1 => k2 |= (byte & 0xFF) << shift + case 2 => k3 |= (byte & 0xFF) << shift + case 3 => k4 |= (byte & 0xFF) << shift + } + } -object MurmurHash3_x86_128 { + if (tail.length > 0) { + k1 = (k1 * c1) & 0xFFFFFFFF + k1 = Integer.rotateLeft(k1, 15) * c2 & 0xFFFFFFFF + h1 ^= k1 + } - /** - * @param data is the bytes to be hashed. - * @param seed is the seed for the murmurhash algorithm. - */ - @inline final def hash(data: Array[Byte], seed: Int) - : Array[Int] = { - hash(data, seed, data.length) - } + if (tail.length > 4) { + k2 = (k2 * c2) & 0xFFFFFFFF + k2 = Integer.rotateLeft(k2, 16) * c3 & 0xFFFFFFFF + h2 ^= k2 + } - /** - * An optimization for reusing memory under large number of hash calls. - * @param data is the bytes to be hashed. - * @param seed is the seed for the murmurhash algorithm. - * @param length is the length of the buffer to use for hashing. - * @param results is the output buffer to store the four ints that are returned, - * should have size at least 4. - */ - @inline final def hash(data: Array[Byte], seed: Int, length: Int, - results: Array[Int]): Unit = { - var i = 0 - val blocks = length >> 4 - val state = new HashState(seed, seed, seed, seed) - while (i < blocks) { - val k1 = getInt(data, 4*i, 4) - val k2 = getInt(data, 4*i + 4, 4) - val k3 = getInt(data, 4*i + 8, 4) - val k4 = getInt(data, 4*i + 12, 4) - state.blockMix(k1, k2, k3, k4) - i += 1 + if (tail.length > 8) { + k3 = (k3 * c3) & 0xFFFFFFFF + k3 = Integer.rotateLeft(k3, 17) * c4 & 0xFFFFFFFF + h3 ^= k3 } - var k1, k2, k3, k4 = 0 - val tail = blocks * 16 - val rem = length - tail - // atmost 15 bytes remain - rem match { - case 12 | 13 | 14 | 15 => { - k1 = getInt(data, tail, 4) - k2 = getInt(data, tail + 4, 4) - k3 = getInt(data, tail + 8, 4) - k4 = getInt(data, tail + 12, rem - 12) - } - case 8 | 9 | 10 | 11 => { - k1 = getInt(data, tail, 4) - k2 = getInt(data, tail + 4, 4) - k3 = getInt(data, tail + 8, rem - 8) - } - case 4 | 5 | 6 | 7 => { - k1 = getInt(data, tail, 4) - k2 = getInt(data, tail + 4, rem - 4) - } - case 0 | 1 | 2 | 3 => { - k1 = getInt(data, tail, rem) - } + + if (tail.length > 12) { + k4 = (k4 * c4) & 0xFFFFFFFF + k4 = Integer.rotateLeft(k4, 18) * c1 & 0xFFFFFFFF + h4 ^= k4 } - state.finalMix(k1, k2, k3, k4, length) - results(0) = state.h1 - results(1) = state.h2 - results(2) = state.h3 - results(3) = state.h4 - } - /** - * An optimization for reusing memory under large number of hash calls. - * @param data is the bytes to be hashed. - * @param seed is the seed for the murmurhash algorithm. - * @param length is the length of the buffer to use for hashing. - * @return is an array of size 4 that holds the four ints that comprise the 128 bit hash. - */ - @inline final def hash(data: Array[Byte], seed: Int, length: Int) - : Array[Int] = { - val results = new Array[Int](4) - hash(data, seed, length, results) - results + // Finalization + h1 ^= length + h2 ^= length + h3 ^= length + h4 ^= length + + h1 = (h1 + h2 + h3 + h4) & 0xFFFFFFFF + h2 = (h1 + h2) & 0xFFFFFFFF + h3 = (h1 + h3) & 0xFFFFFFFF + h4 = (h1 + h4) & 0xFFFFFFFF + + h1 = fmix(h1) + h2 = fmix(h2) + h3 = fmix(h3) + h4 = fmix(h4) + + h1 = (h1 + h2 + h3 + h4) & 0xFFFFFFFF + h2 = (h1 + h2) & 0xFFFFFFFF + h3 = (h1 + h3) & 0xFFFFFFFF + h4 = (h1 + h4) & 0xFFFFFFFF + + BigInt(h4) << 96 | BigInt(h3) << 64 | BigInt(h2) << 32 | BigInt(h1) } - /** - * Utility function to convert a byte array into an int, filling in zeros - * if the byte array is not big enough. - * @param data is the byte array to be converted to an int. - * @param index is the starting index in the byte array. - * @param rem is the remainder of the byte array to examine. - */ - @inline final def getInt(data: Array[Byte], index: Int, rem: Int): Int = { - rem match { - case 3 => data(index) << 24 | - (data(index + 1) & 0xFF) << 16 | - (data(index + 2) & 0xFF) << 8 - case 2 => data(index) << 24 | - (data(index + 1) & 0xFF) << 16 - case 1 => data(index) << 24 - case 0 => 0 - case _ => data(index) << 24 | - (data(index + 1) & 0xFF) << 16 | - (data(index + 2) & 0xFF) << 8 | - (data(index + 3) & 0xFF) - } + def hash64(key: Array[Byte], seed: Int = 0): Long = { + val hash128 = MurmurHash3.hash128(key, seed) + val low = (hash128 & BigInt("FFFFFFFFFFFFFFFF", 16)).toLong + low } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala index 32a57cacc75..1e964da75b4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/NeuroglancerPrecomputedShardingUtils.scala @@ -124,10 +124,20 @@ trait NeuroglancerPrecomputedShardingUtils { } def getChunkRange(chunkId: Long, minishardIndex: Array[(Long, Long, Long)])( - implicit ec: ExecutionContext): Fox[NumericRange.Exclusive[Long]] = + implicit ec: ExecutionContext): Fox[NumericRange.Exclusive[Long]] = for { chunkSpecification <- Fox.option2Fox(minishardIndex.find(_._1 == chunkId)) ?~> s"Could not find chunk id $chunkId in minishard index" chunkStart = (shardIndexRange.end) + chunkSpecification._2 chunkEnd = (shardIndexRange.end) + chunkSpecification._2 + chunkSpecification._3 } yield Range.Long(chunkStart, chunkEnd, 1) + + def getChunk(chunkRange: NumericRange[Long], shardPath: VaultPath)(implicit ec: ExecutionContext): Fox[Array[Byte]] = + for { + rawBytes <- shardPath.readBytes(Some(chunkRange)) + bytes = shardingSpecification.data_encoding match { + // Check for GZIP Magic bytes to check if it was already decompressed + case "gzip" if rawBytes(0) == 31 && rawBytes(1) == -117 => ZipIO.gunzip(rawBytes) + case _ => rawBytes + } + } yield bytes } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala index 8e2a558982e..ab536ed0cb0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala @@ -87,18 +87,10 @@ case class ShardingSpecification(`@type`: String, def hashFunction(input: Long): Long = hash match { case "identity" => input - case "murmurhash3_x86_128" => applyMurmurHash3(input) + case "murmurhash3_x86_128" => MurmurHash3.hash64(longToBytes(input)) case _ => throw new IllegalArgumentException(s"Unsupported hash function: $hash") } - private def applyMurmurHash3(input: Long): Long = { - // he MurmurHash3_x86_128 hash function applied to the shifted chunk ID in little endian encoding. The low 8 bytes of the resultant hash code are treated as a little endian 64-bit number. - val bytes = longToBytes(input) - val hash = MurmurHash3_x86_128.hash(bytes, 0) - val result = hash(0) & 0xFFFFFFFFL | (hash(1) & 0xFFFFFFFFL) << 32 - result - } - private lazy val minishardMask = { if (minishard_bits == 0) { 0 @@ -113,7 +105,7 @@ case class ShardingSpecification(`@type`: String, } private lazy val shardMask = { - val oneMask = Long.MinValue // 0xFFFFFFFFFFFFFFFF + val oneMask = 0xFFFFFFFFFFFFFFFFL val cursor = minishard_bits + shard_bits val shardMask = ~((oneMask >> cursor) << cursor) shardMask & (~minishardMask) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala index d01f4b15bc5..50a9d8580c6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala @@ -304,7 +304,8 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV // - Can some sharding stuff be unified between array and mesh? // - Support non sharding meshes? // - Tests? - // - Need to implement murmurhash3_x86_128 + // - Caching + // - Extract stuff for LOD violations /* Note that null is a valid value here for once. Meshfiles with no information about the @@ -482,7 +483,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV shardUrl = mesh.shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) minishardIndex <- mesh.getMinishardIndex(shardUrl, minishardInfo._2.toInt) chunkRange <- mesh.getChunkRange(segmentId, minishardIndex) - chunk <- shardUrl.readBytes(Some(chunkRange)) + chunk <- mesh.getChunk(chunkRange, shardUrl) segmentManifest = NeuroglancerSegmentManifest.fromBytes(chunk) meshSegmentInfo = enrichSegmentInfo(segmentManifest, meshInfo.lod_scale_multiplier, chunkRange.start, segmentId) transform = meshInfo.transform2DArray diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala index 60af07b1c0a..20eec6bb956 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/NeuroglancerMesh.scala @@ -1,8 +1,11 @@ package com.scalableminds.webknossos.datastore.services -import com.scalableminds.webknossos.datastore.datareaders.precomputed.{NeuroglancerPrecomputedShardingUtils, ShardingSpecification} +import com.scalableminds.webknossos.datastore.datareaders.precomputed.{ + NeuroglancerPrecomputedShardingUtils, + ShardingSpecification +} -case class NeuroglancerMesh(meshInfo: NeuroglancerPrecomputedMeshInfo) extends NeuroglancerPrecomputedShardingUtils{ +case class NeuroglancerMesh(meshInfo: NeuroglancerPrecomputedMeshInfo) extends NeuroglancerPrecomputedShardingUtils { override val shardingSpecification: ShardingSpecification = meshInfo.sharding.get // TODO: Remove get } From b165b911a2cf2185bd8233dfeef25afb6ce0456e Mon Sep 17 00:00:00 2001 From: frcroth Date: Mon, 2 Dec 2024 16:11:37 +0100 Subject: [PATCH 5/9] Move todos to PR description --- .../datastore/services/MeshFileService.scala | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala index 50a9d8580c6..e11e1078ab6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala @@ -296,17 +296,6 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV .toSet } - // TODOs: - // - Move all neuroglancer precomputed mesh handling to different service? - // - Add enum for mesh types - // - ??? Maybe change API to have a mesh object instead of different keys for name, path, type - // - Support tokens? - // - Can some sharding stuff be unified between array and mesh? - // - Support non sharding meshes? - // - Tests? - // - Caching - // - Extract stuff for LOD violations - /* Note that null is a valid value here for once. Meshfiles with no information about the meshFilePath will return Fox.empty, while meshfiles with one marked as empty, will return Fox.successful(null) From 847d17205991b1d6e14e6cd90cc11f895feeed43 Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 4 Dec 2024 09:48:19 +0100 Subject: [PATCH 6/9] Implement reading Mesh chunk --- .../controllers/DSMeshController.scala | 10 ++++---- .../services/DSFullMeshService.scala | 5 +++- .../datastore/services/MeshFileService.scala | 24 ++++++++++++++++++- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index 9a5d4584d06..05093a1f065 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -98,10 +98,12 @@ class DSMeshController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId)), urlOrHeaderToken(token, request)) { for { - (data, encoding) <- meshFileService.readMeshChunk(organizationId, - datasetDirectoryName, - dataLayerName, - request.body) ?~> "mesh.file.loadChunk.failed" + (data, encoding) <- request.body.meshFileType match { + case Some("neuroglancerPrecomputed") => + meshFileService.readMeshChunkForNeuroglancerPrecomputed(request.body.meshFilePath, request.body.requests) + case _ => + meshFileService.readMeshChunk(organizationId, datasetDirectoryName, dataLayerName, request.body) ?~> "mesh.file.loadChunk.failed" + } } yield { if (encoding.contains("gzip")) { Ok(data).withHeaders("Content-Encoding" -> "gzip") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala index 11c560f31cd..15291298662 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala @@ -166,7 +166,10 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, organizationId, datasetDirectoryName, layerName, - MeshChunkDataRequestList(meshfileName, List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize))) + MeshChunkDataRequestList(meshfileName, + None, + None, + List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, None))) ) ?~> "mesh.file.loadChunk.failed" _ <- bool2Fox(encoding == "draco") ?~> s"meshfile encoding is $encoding, only draco is supported" scale <- tryo(Vec3Double(transform(0)(0), transform(1)(1), transform(2)(2))) ?~> "could not extract scale from meshfile transform attribute" diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala index 958e667e3ce..a1cfc22dd8f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala @@ -49,11 +49,14 @@ object ListMeshChunksRequest { case class MeshChunkDataRequest( byteOffset: Long, - byteSize: Int + byteSize: Int, + segmentId: Option[Long] // Only relevant for neuroglancer precomputed meshes ) case class MeshChunkDataRequestList( meshFile: String, + meshFilePath: Option[String], + meshFileType: Option[String], requests: Seq[MeshChunkDataRequest] ) @@ -530,4 +533,23 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV meshFileCache.clear(key => key.startsWith(relevantPath.toString)) } + def readMeshChunkForNeuroglancerPrecomputed( + meshFilePathOpt: Option[String], + meshChunkDataRequests: Seq[MeshChunkDataRequest]): Fox[(Array[Byte], String)] = + for { + meshFilePath <- meshFilePathOpt.toFox ?~> "Mesh file path is required" + vaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(meshFilePath), None)) + meshInfo <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) + mesh = NeuroglancerMesh(meshInfo) + + segmentId <- Fox.option2Fox(meshChunkDataRequests.head.segmentId) ?~> "Segment id parameter is required" // This assumes that all requests are for the same segment + + minishardInfo = mesh.shardingSpecification.getMinishardInfo(segmentId) + shardUrl = mesh.shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) + chunks <- Fox.serialCombined(meshChunkDataRequests.toList)(request => + shardUrl.readBytes(Some(request.byteOffset until request.byteOffset + request.byteSize))) + encoding = "identity" + output = chunks.flatten.toArray + } yield (output, encoding) + } From 90a52e40211795131efc041445a5eed2d972bac2 Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 4 Dec 2024 11:56:06 +0100 Subject: [PATCH 7/9] Implement loading stl --- .../controllers/DSMeshController.scala | 5 +- .../services/DSFullMeshService.scala | 70 +++++++++++++++---- .../datastore/services/MeshFileService.scala | 2 +- 3 files changed, 62 insertions(+), 15 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index 05093a1f065..1e2ae279a2d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -76,8 +76,9 @@ class DSMeshController @Inject()( ) chunkInfos <- request.body.meshFileType match { case Some("neuroglancerPrecomputed") => - meshFileService.listMeshChunksForNeuroglancerPrecomputedMesh(request.body.meshFilePath, - request.body.segmentId) + meshFileService.listMeshChunksForNeuroglancerPrecomputedMesh( + request.body.meshFilePath, + request.body.segmentId) // TODO: Pass segmentIds here case _ => meshFileService.listMeshChunksForSegmentsMerged(organizationId, datasetDirectoryName, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala index 15291298662..8e1ad265db7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala @@ -25,7 +25,9 @@ case class FullMeshRequest( editableMappingTracingId: Option[String], mag: Option[Vec3Int], // required for ad-hoc meshing seedPosition: Option[Vec3Int], // required for ad-hoc meshing - additionalCoordinates: Option[Seq[AdditionalCoordinate]] + additionalCoordinates: Option[Seq[AdditionalCoordinate]], + meshFilePath: Option[String], // required for remote neuroglancer precomputed mesh files + meshFileType: Option[String] ) object FullMeshRequest { @@ -55,6 +57,8 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, dataLayerName: String, fullMeshRequest: FullMeshRequest)(implicit ec: ExecutionContext, m: MessagesProvider): Fox[Array[Byte]] = fullMeshRequest.meshFileName match { + case Some(_) if fullMeshRequest.meshFilePath.isDefined => + loadFullMeshFromRemoteNeuroglancerMeshFile(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) case Some(_) => loadFullMeshFromMeshfile(token, organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) case None => loadFullMeshFromAdHoc(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) @@ -149,7 +153,10 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, layerName, meshFileName, chunkRange, - chunkInfos.transform) + chunkInfos.transform, + None, + None, + None) } stlOutput = combineEncodedChunksToStl(stlEncodedChunks) _ = logMeshingDuration(before, "meshfile", stlOutput.length) @@ -160,17 +167,28 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, layerName: String, meshfileName: String, chunkInfo: MeshChunk, - transform: Array[Array[Double]])(implicit ec: ExecutionContext): Fox[Array[Byte]] = + transform: Array[Array[Double]], + meshFileType: Option[String], + meshFilePath: Option[String], + segmentId: Option[Long])(implicit ec: ExecutionContext): Fox[Array[Byte]] = for { - (dracoMeshChunkBytes, encoding) <- meshFileService.readMeshChunk( - organizationId, - datasetDirectoryName, - layerName, - MeshChunkDataRequestList(meshfileName, - None, - None, - List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, None))) - ) ?~> "mesh.file.loadChunk.failed" + (dracoMeshChunkBytes, encoding) <- meshFileType match { + case Some("neuroglancerPrecomputed") => + meshFileService.readMeshChunkForNeuroglancerPrecomputed( + meshFilePath, + Seq(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, segmentId)) + ) ?~> "mesh.file.loadChunk.failed" + case None => + meshFileService.readMeshChunk( + organizationId, + datasetDirectoryName, + layerName, + MeshChunkDataRequestList(meshfileName, + None, + None, + List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, None))) + ) ?~> "mesh.file.loadChunk.failed" + } _ <- bool2Fox(encoding == "draco") ?~> s"meshfile encoding is $encoding, only draco is supported" scale <- tryo(Vec3Double(transform(0)(0), transform(1)(1), transform(2)(2))) ?~> "could not extract scale from meshfile transform attribute" stlEncodedChunk <- tryo( @@ -183,4 +201,32 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, scale.z)) } yield stlEncodedChunk + private def loadFullMeshFromRemoteNeuroglancerMeshFile( + organizationId: String, + datasetDirectoryName: String, + layerName: String, + fullMeshRequest: FullMeshRequest)(implicit ec: ExecutionContext, m: MessagesProvider): Fox[Array[Byte]] = + for { + // TODO: Mapping, segmentIds + chunkInfos: WebknossosSegmentInfo <- meshFileService.listMeshChunksForNeuroglancerPrecomputedMesh( + fullMeshRequest.meshFilePath, + fullMeshRequest.segmentId + ) + allChunkRanges: List[MeshChunk] = chunkInfos.chunks.lods.head.chunks + stlEncodedChunks: Seq[Array[Byte]] <- Fox.serialCombined(allChunkRanges) { chunkRange: MeshChunk => + readMeshChunkAsStl( + organizationId, + datasetDirectoryName, + layerName, + fullMeshRequest.meshFileName.get, + chunkRange, + chunkInfos.transform, + fullMeshRequest.meshFileType, + fullMeshRequest.meshFilePath, + Some(fullMeshRequest.segmentId) + ) + } + stlOutput = combineEncodedChunksToStl(stlEncodedChunks) + } yield stlOutput + } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala index a1cfc22dd8f..455b8626379 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala @@ -548,7 +548,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV shardUrl = mesh.shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) chunks <- Fox.serialCombined(meshChunkDataRequests.toList)(request => shardUrl.readBytes(Some(request.byteOffset until request.byteOffset + request.byteSize))) - encoding = "identity" + encoding = "draco" output = chunks.flatten.toArray } yield (output, encoding) From fa7e122d0d650c33d0135140cadddf999e1fdf4c Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 4 Dec 2024 13:35:02 +0100 Subject: [PATCH 8/9] Adapt frotend --- frontend/javascripts/admin/api/mesh.ts | 7 +++++++ .../javascripts/oxalis/model/sagas/mesh_saga.ts | 16 +++++++++++++--- frontend/javascripts/types/api_flow_types.ts | 2 ++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/frontend/javascripts/admin/api/mesh.ts b/frontend/javascripts/admin/api/mesh.ts index d64f2d7d592..673e5bf0aaa 100644 --- a/frontend/javascripts/admin/api/mesh.ts +++ b/frontend/javascripts/admin/api/mesh.ts @@ -44,6 +44,8 @@ export function getMeshfileChunksForSegment( // editableMappingTracingId should be the tracing id, not the editable mapping id. // If this is set, it is assumed that the request is about an editable mapping. editableMappingTracingId: string | null | undefined, + meshFileType: string | null | undefined, + meshFilePath: string | null | undefined, ): Promise { return doWithToken((token) => { const params = new URLSearchParams(); @@ -60,6 +62,8 @@ export function getMeshfileChunksForSegment( data: { meshFile, segmentId, + meshFileType, + meshFilePath, }, showErrorToast: false, }, @@ -70,10 +74,13 @@ export function getMeshfileChunksForSegment( type MeshChunkDataRequest = { byteOffset: number; byteSize: number; + segmentId: number | null; // Only relevant for neuroglancer precomputed meshes }; type MeshChunkDataRequestList = { meshFile: string; + meshFileType: string | null | undefined; + meshFilePath: string | null | undefined; requests: MeshChunkDataRequest[]; }; diff --git a/frontend/javascripts/oxalis/model/sagas/mesh_saga.ts b/frontend/javascripts/oxalis/model/sagas/mesh_saga.ts index 2fdf47b3710..0cb6f2972c2 100644 --- a/frontend/javascripts/oxalis/model/sagas/mesh_saga.ts +++ b/frontend/javascripts/oxalis/model/sagas/mesh_saga.ts @@ -846,6 +846,7 @@ function* loadPrecomputedMeshForSegmentId( scale, additionalCoordinates, mergeChunks, + id, ); try { @@ -882,7 +883,7 @@ function* _getChunkLoadingDescriptors( const { segmentMeshController } = getSceneController(); const version = meshFile.formatVersion; - const { meshFileName } = meshFile; + const { meshFileName, meshFileType, meshFilePath } = meshFile; const editableMapping = yield* select((state) => getEditableMappingForVolumeTracingId(state, segmentationLayer.tracingId), @@ -916,6 +917,8 @@ function* _getChunkLoadingDescriptors( // without a mapping. meshFile.mappingName == null ? mappingName : null, editableMapping != null && tracing ? tracing.tracingId : null, + meshFileType, + meshFilePath, ); scale = [segmentInfo.transform[0][0], segmentInfo.transform[1][1], segmentInfo.transform[2][2]]; segmentInfo.chunks.lods.forEach((chunks, lodIndex) => { @@ -951,9 +954,10 @@ function _getLoadChunksTasks( scale: Vector3 | null, additionalCoordinates: AdditionalCoordinate[] | null, mergeChunks: boolean, + segmentId: number, ) { const { segmentMeshController } = getSceneController(); - const { meshFileName } = meshFile; + const { meshFileName, meshFileType, meshFilePath } = meshFile; const loader = getDracoLoader(); return _.compact( _.flatten( @@ -981,8 +985,14 @@ function _getLoadChunksTasks( getBaseSegmentationName(segmentationLayer), { meshFile: meshFileName, + meshFileType, + meshFilePath, // Only extract the relevant properties - requests: chunks.map(({ byteOffset, byteSize }) => ({ byteOffset, byteSize })), + requests: chunks.map(({ byteOffset, byteSize }) => ({ + byteOffset, + byteSize, + segmentId: segmentId, + })), }, ); diff --git a/frontend/javascripts/types/api_flow_types.ts b/frontend/javascripts/types/api_flow_types.ts index e3fb3502ed8..046f0ceff4d 100644 --- a/frontend/javascripts/types/api_flow_types.ts +++ b/frontend/javascripts/types/api_flow_types.ts @@ -892,6 +892,8 @@ export type APIMeshFile = { // 1-2 - the format should behave as v0 (refer to voxelytics for actual differences) // 3 - is the newer version with draco encoding. formatVersion: number; + meshFileType: string | null | undefined; + meshFilePath: string | null | undefined; }; export type APIConnectomeFile = { connectomeFileName: string; From 51ae8c6d3f47843bc8122b1503c64c7153d1f440 Mon Sep 17 00:00:00 2001 From: frcroth Date: Mon, 16 Dec 2024 13:28:38 +0100 Subject: [PATCH 9/9] Use selected lod --- .../datastore/services/DSFullMeshService.scala | 7 ++++--- .../datastore/services/MeshFileService.scala | 17 +++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala index 8e1ad265db7..2b3ac4c5cf2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSFullMeshService.scala @@ -178,7 +178,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, meshFilePath, Seq(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, segmentId)) ) ?~> "mesh.file.loadChunk.failed" - case None => + case _ => meshFileService.readMeshChunk( organizationId, datasetDirectoryName, @@ -212,7 +212,8 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, fullMeshRequest.meshFilePath, fullMeshRequest.segmentId ) - allChunkRanges: List[MeshChunk] = chunkInfos.chunks.lods.head.chunks + selectedLod = fullMeshRequest.lod.getOrElse(0) + allChunkRanges: List[MeshChunk] = chunkInfos.chunks.lods(selectedLod).chunks stlEncodedChunks: Seq[Array[Byte]] <- Fox.serialCombined(allChunkRanges) { chunkRange: MeshChunk => readMeshChunkAsStl( organizationId, @@ -220,7 +221,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, layerName, fullMeshRequest.meshFileName.get, chunkRange, - chunkInfos.transform, + Array(Array(1, 0, 0), Array(0, 1, 0), Array(0, 0, 1)), fullMeshRequest.meshFileType, fullMeshRequest.meshFilePath, Some(fullMeshRequest.segmentId) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala index 455b8626379..70904d6cc19 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MeshFileService.scala @@ -4,7 +4,6 @@ import com.google.common.io.LittleEndianDataInputStream import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.{Vec3Float, Vec3Int} import com.scalableminds.util.io.PathUtils -import com.scalableminds.util.tools.JsonHelper.bool2Box import com.scalableminds.util.tools.{ByteUtils, Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.datareaders.precomputed.ShardingSpecification @@ -25,6 +24,7 @@ import com.scalableminds.webknossos.datastore.storage.{ import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box import net.liftweb.common.Box.tryo +import net.liftweb.common.Full import org.apache.commons.io.FilenameUtils import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Json, OFormat} @@ -480,7 +480,8 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV chunk <- mesh.getChunk(chunkRange, shardUrl) segmentManifest = NeuroglancerSegmentManifest.fromBytes(chunk) meshSegmentInfo = enrichSegmentInfo(segmentManifest, meshInfo.lod_scale_multiplier, chunkRange.start, segmentId) - transform = meshInfo.transform2DArray + transform = meshInfo.transform2DArray // Something is going wrong here, the meshes are far outside the other data + //transform = Array(Array(2.0, 0.0, 0.0, 0.0), Array(0.0, 2.0, 0.0, 0.0), Array(0.0, 0.0, 2.0, 0.0)) encoding = "draco" wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(List(meshSegmentInfo), encoding, transform) } yield wkChunkInfos @@ -511,20 +512,16 @@ class MeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataV // Sort the requests by byte offset to optimize for spinning disk access val requestsReordered = meshChunkDataRequests.requests.zipWithIndex.sortBy(requestAndIndex => requestAndIndex._1.byteOffset).toList - val data: List[(Array[Byte], String, Int)] = requestsReordered.map { requestAndIndex => + val data: List[(Array[Byte], Int)] = requestsReordered.map { requestAndIndex => val meshChunkDataRequest = requestAndIndex._1 val data = cachedMeshFile.uint8Reader.readArrayBlockWithOffset("neuroglancer", meshChunkDataRequest.byteSize, meshChunkDataRequest.byteOffset) - (data, meshFormat, requestAndIndex._2) + (data, requestAndIndex._2) } - val dataSorted = data.sortBy(d => d._3) - for { - _ <- bool2Box(data.map(d => d._2).toSet.size == 1) ?~! "Different encodings for the same mesh chunk request found." - encoding <- data.map(d => d._2).headOption - output = dataSorted.flatMap(d => d._1).toArray - } yield (output, encoding) + val dataSorted = data.sortBy(d => d._2) + Full((dataSorted.flatMap(d => d._1).toArray, meshFormat)) } def clearCache(organizationId: String, datasetDirectoryName: String, layerNameOpt: Option[String]): Int = {