Skip to content

Commit

Permalink
[Backport release 1.15][python/c++] Add C++ SOMACoordinateSpace and…
Browse files Browse the repository at this point in the history
… use it to push `Scene` metadata down to C++ (#3602)

* [python/cpp] Add C++ `SOMACoordinateSpace` and use it to push `Scene` metadata down to C++ (#3580)

Create a new class `SOMACoordinateSpace` in C++ and use it to read/write coordinate space metadata. Use this class to write coordinate space metadata for the `SOMAScene` class in C++. This replaces writing the metadata in the Python layer and fixes an issue where the `Scene` metadata was being written in a later timestamp than other core metadata like the SOMA datatype.

* std::format -> fmt::format for C++17 on release-1.15 branch

* fix merge

* re-run lint-fixer after merge

---------

Co-authored-by: Julia Dark <[email protected]>
  • Loading branch information
johnkerl and jp-dark authored Jan 21, 2025
1 parent 0f6a252 commit cbccbaa
Show file tree
Hide file tree
Showing 13 changed files with 537 additions and 39 deletions.
27 changes: 14 additions & 13 deletions apis/python/src/tiledbsoma/_scene.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
from ._collection import CollectionBase
from ._constants import (
SOMA_COORDINATE_SPACE_METADATA_KEY,
SOMA_SPATIAL_ENCODING_VERSION,
SOMA_SPATIAL_VERSION_METADATA_KEY,
SPATIAL_DISCLAIMER,
)
from ._exception import SOMAError, map_exception_for_create
Expand Down Expand Up @@ -109,25 +107,28 @@ def create(
warnings.warn(SPATIAL_DISCLAIMER)

context = _validate_soma_tiledb_context(context)

if coordinate_space is None:
axis_names = None
axis_units = None
elif isinstance(coordinate_space, CoordinateSpace):
axis_names = tuple(axis.name for axis in coordinate_space)
axis_units = tuple(axis.unit for axis in coordinate_space)
else:
axis_names = tuple(coordinate_space)
axis_units = None

try:
timestamp_ms = context._open_timestamp_ms(tiledb_timestamp)
clib.SOMAScene.create(
ctx=context.native_context,
uri=uri,
axis_names=axis_names,
axis_units=axis_units,
timestamp=(0, timestamp_ms),
)
handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp)
handle.metadata[SOMA_SPATIAL_VERSION_METADATA_KEY] = (
SOMA_SPATIAL_ENCODING_VERSION
)
if coordinate_space is not None:
if not isinstance(coordinate_space, CoordinateSpace):
coordinate_space = CoordinateSpace.from_axis_names(coordinate_space)
handle.meta[SOMA_COORDINATE_SPACE_METADATA_KEY] = (
coordinate_space_to_json(coordinate_space)
)
return cls(
handle,
cls._wrapper_type.open(uri, "w", context, tiledb_timestamp),
_dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code",
)
except SOMAError as e:
Expand Down
20 changes: 19 additions & 1 deletion apis/python/src/tiledbsoma/soma_collection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,34 @@ void load_soma_collection(py::module& m) {
"create",
[](std::shared_ptr<SOMAContext> ctx,
std::string_view uri,
std::optional<std::vector<std::string>> axis_names,
std::optional<std::vector<std::optional<std::string>>>
axis_units,
std::optional<TimestampRange> timestamp) {
if (axis_units.has_value() && !axis_names.has_value()) {
throw TileDBSOMAError(
"Cannot provide axis units without axis names.");
}
std::optional<SOMACoordinateSpace> coord_space{std::nullopt};
if (axis_names.has_value()) {
if (axis_units.has_value()) {
coord_space = SOMACoordinateSpace(
axis_names.value(), axis_units.value());
} else {
coord_space = SOMACoordinateSpace(axis_names.value());
}
}
try {
SOMAScene::create(uri, ctx, timestamp);
SOMAScene::create(uri, ctx, coord_space, timestamp);
} catch (const std::exception& e) {
TPY_ERROR_LOC(e.what());
}
},
py::kw_only(),
"ctx"_a,
"uri"_a,
"axis_names"_a,
"axis_units"_a,
"timestamp"_a = py::none())
.def_static(
"open",
Expand Down
37 changes: 15 additions & 22 deletions apis/python/tests/test_scene.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,36 +122,29 @@ def test_scene_basic(tmp_path):
soma.MultiscaleImage.open(uri)


def test_scene_coord_space_at_create(tmp_path):
@pytest.mark.parametrize(
"input,expected",
(
(["x", "y"], soma.CoordinateSpace.from_axis_names(("x", "y"))),
(
soma.CoordinateSpace([soma.Axis("x", "meters"), soma.Axis("y", "meters")]),
soma.CoordinateSpace([soma.Axis("x", "meters"), soma.Axis("y", "meters")]),
),
),
)
def test_scene_coord_space_at_create(tmp_path, input, expected):
uri = tmp_path.as_uri()

coord_space = soma.CoordinateSpace(
[
soma.Axis(name="x"),
soma.Axis(name="y"),
]
)
coord_space_json = """
[
{"name": "x", "unit": null},
{"name": "y", "unit": null}
]
"""

with soma.Scene.create(uri, coordinate_space=("x", "y")) as scene:
with soma.Scene.create(uri, coordinate_space=input) as scene:

# Reserved metadata key should not be settable?
# with pytest.raises(soma.SOMAError):
# scene.metadata["soma_coordinate_space"] = coord_space_json
# scene.metadata["soma_coordinate_space"] = "user_metadata"

scene.coordinate_space = coord_space
assert scene.coordinate_space == coord_space
assert json.loads(scene.metadata["soma_coordinate_space"]) == json.loads(
coord_space_json
)
assert scene.coordinate_space == expected

with soma.Scene.open(uri) as scene:
assert scene.coordinate_space == coord_space
assert scene.coordinate_space == expected


def test_scene_coord_space_after_create(tmp_path):
Expand Down
3 changes: 3 additions & 0 deletions libtiledbsoma/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ set_property(TARGET TILEDBSOMA_NANOARROW_OBJECT PROPERTY POSITION_INDEPENDENT_CO
add_library(TILEDB_SOMA_OBJECTS OBJECT
${CMAKE_CURRENT_SOURCE_DIR}/reindexer/reindexer.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/managed_query.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_coordinates.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_array.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_group.cc
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_object.cc
Expand Down Expand Up @@ -189,6 +190,7 @@ endif()
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_experiment.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_measurement.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_scene.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_coordinates.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_geometry_dataframe.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_point_cloud_dataframe.h
# ${CMAKE_CURRENT_SOURCE_DIR}/cpp_api/soma_multiscale_image.h
Expand All @@ -202,6 +204,7 @@ install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/soma/logger_public.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_context.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/managed_query.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_coordinates.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/array_buffers.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/column_buffer.h
${CMAKE_CURRENT_SOURCE_DIR}/soma/soma_array.h
Expand Down
156 changes: 156 additions & 0 deletions libtiledbsoma/src/soma/soma_coordinates.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/**
* This file defines classes, structs, and helpers for managing coordinate
* spaces and coordinate space transformations.
* @file soma_coordinates.cc
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2025 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* This file defines classes, structs, and helpers for managing coordinate
* spaces and coordinate space transformations.
*/

#include <tiledb/tiledb>
#include <unordered_set>
#include "../utils/logger.h"
#include "nlohmann/json.hpp"

#include "soma_coordinates.h"

using json = nlohmann::json;

namespace nlohmann {
template <>
struct adl_serializer<tiledbsoma::SOMAAxis> {
static void to_json(json& j, const tiledbsoma::SOMAAxis& axis) {
// void to_json(json& j, tiledbsoma::SOMAAxis& axis) {
if (axis.unit.has_value()) {
j = json{{"name", axis.name}, {"unit", axis.unit.value()}};
} else {
j = json{{"name", axis.name}, {"unit", nullptr}};
}
}

static void from_json(const json& j, tiledbsoma::SOMAAxis& axis) {
j.at("name").get_to(axis.name);
auto unit_json = j.at("unit");
if (unit_json.is_null()) {
axis.unit = std::nullopt;
} else {
unit_json.get_to(axis.unit);
}
}
};
} // namespace nlohmann

namespace tiledbsoma {

SOMACoordinateSpace::SOMACoordinateSpace()
: axes_{{"x", std::nullopt}, {"y", std::nullopt}} {
}

SOMACoordinateSpace::SOMACoordinateSpace(const std::vector<SOMAAxis>& axes)
: axes_{axes} {
if (axes_.size() == 0) {
throw TileDBSOMAError("Coordinate space must have at least one axis.");
}
std::unordered_set<std::string> axis_names;
for (const auto& axis : axes_) {
axis_names.emplace(axis.name);
}
if (axes_.size() != axis_names.size()) {
throw TileDBSOMAError(
"The name for coordinate space axes must be unique.");
}
}

SOMACoordinateSpace::SOMACoordinateSpace(
const std::vector<std::string>& axis_names) {
if (axis_names.size() == 0) {
throw TileDBSOMAError("Coordinate space must have at least one axis.");
}
std::unordered_set<std::string> unique_axis_names(
axis_names.begin(), axis_names.end());
if (axis_names.size() != unique_axis_names.size()) {
throw TileDBSOMAError(
"The name for coordinate space axes must be unique.");
}
axes_.reserve(axis_names.size());
for (const auto& name : axis_names) {
axes_.push_back({name, std::nullopt});
}
}

SOMACoordinateSpace::SOMACoordinateSpace(
const std::vector<std::string>& axis_names,
const std::vector<std::optional<std::string>>& axis_units) {
if (axis_names.size() != axis_units.size()) {
throw TileDBSOMAError(
"[SOMACoordinateSpace]: Axis names and axis units size mismatch. ");
}
auto num_axes = axis_names.size();
if (num_axes == 0) {
throw TileDBSOMAError("Coordinate space must have at least one axis.");
}
std::unordered_set<std::string> unique_axis_names(
axis_names.begin(), axis_names.end());
if (axis_names.size() != unique_axis_names.size()) {
throw TileDBSOMAError(
"The name for coordinate space axes must be unique.");
}
axes_.reserve(num_axes);
for (size_t index{0}; index < num_axes; ++index) {
axes_.push_back({axis_names[index], axis_units[index]});
}
}

SOMACoordinateSpace SOMACoordinateSpace::from_metadata(
tiledb_datatype_t value_type, uint32_t value_num, const void* value) {
if (value_type != TILEDB_STRING_UTF8 && value_type != TILEDB_STRING_ASCII) {
throw TileDBSOMAError(fmt::format(
"[SOMACoordinateSpace]: Unexpected datatype for coordinate space "
"metadata. Expected {} or {}; got {}",
tiledb::impl::type_to_str(TILEDB_STRING_UTF8),
tiledb::impl::type_to_str(TILEDB_STRING_ASCII),
tiledb::impl::type_to_str(value_type)));
}
if (value == nullptr) {
throw TileDBSOMAError(
"[SOMACoordinateSpace]: Missing value for coordinate space "
"metadata.");
}
std::string value_str(static_cast<const char*>(value), value_num);
auto value_json = json::parse(value_str);
auto axes = value_json.template get<std::vector<SOMAAxis>>();

return SOMACoordinateSpace(axes);
}

std::string SOMACoordinateSpace::to_string() const {
json serializer(axes_);
return serializer.dump(-1, ' ', true);
}
} // namespace tiledbsoma
Loading

0 comments on commit cbccbaa

Please sign in to comment.