Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic alignment computation for SIMD error functions #90

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions .github/workflows/ci_ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,18 @@ on:

jobs:
momentum:
name: momentum-ubuntu
name: momentum-simd:${{ matrix.simd }}-ubuntu
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- simd: "ON"
pymomentum: "ON"
- simd: "OFF"
pymomentum: "OFF"
env:
MOMENTUM_ENABLE_SIMD: ${{ matrix.simd }}
steps:
- name: Checkout
uses: actions/checkout@v4
Expand All @@ -25,10 +35,14 @@ jobs:
cache: true
- name: Build and test Momentum
run: |
pixi run test
MOMENTUM_BUILD_PYMOMENTUM=$MOMENTUM_BUILD_PYMOMENTUM \
MOMENTUM_ENABLE_SIMD=$MOMENTUM_ENABLE_SIMD \
pixi run test
- name: Install Momentum and Build hello_world
run: |
pixi run install
MOMENTUM_BUILD_PYMOMENTUM=$MOMENTUM_BUILD_PYMOMENTUM \
MOMENTUM_ENABLE_SIMD=$MOMENTUM_ENABLE_SIMD \
pixi run install
pixi run cmake \
-S momentum/examples/hello_world \
-B momentum/examples/hello_world/build \
Expand Down
3 changes: 3 additions & 0 deletions cmake/mt_defs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,9 @@ function(mt_library)
else()
target_compile_options(${_ARG_NAME} ${type} -march=native)
endif()
target_compile_definitions(${_ARG_NAME}
${type} -DMOMENTUM_ENABLE_SIMD=1
)
endif()
target_compile_options(${_ARG_NAME}
${type} ${_ARG_PUBLIC_COMPILE_OPTIONS}
Expand Down
62 changes: 32 additions & 30 deletions momentum/character_solver/simd_normal_error_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,15 @@

namespace momentum {

namespace {

template <typename Derived>
void ensureAligned(const Eigen::MatrixBase<Derived>& mat, size_t& addressOffset) {
if (addressOffset == 8)
addressOffset = 0;
MT_CHECK(((uintptr_t)(&mat(addressOffset, 0))) % 32 == 0);
}

} // namespace

SimdNormalConstraints::SimdNormalConstraints(const Skeleton* skel) {
constexpr uintptr_t kAlignment = 32;

// resize all arrays to the number of joints
MT_CHECK(skel->joints.size() <= kMaxJoints);
numJoints = static_cast<int>(skel->joints.size());
const auto dataSize = kMaxConstraints * numJoints;
MT_CHECK(dataSize % kAlignment == 0);
MT_CHECK(dataSize % kSimdAlignment == 0);

// create memory for all floats
data = makeAlignedUnique<float, kAlignment>(dataSize * 10);
data = makeAlignedUnique<float, kSimdAlignment>(dataSize * 10);
float* dataPtr = data.get();

offsetX = dataPtr + dataSize * 0;
Expand Down Expand Up @@ -169,7 +156,7 @@ double SimdNormalErrorFunction::getError(
}
}

// sum the avx error register for final result
// sum the SIMD error register for final result
return static_cast<float>(drjit::sum(error) * kPlaneWeight * weight_);
}

Expand Down Expand Up @@ -310,9 +297,8 @@ double SimdNormalErrorFunction::getJacobian(
// storage for joint errors
std::vector<double> jointErrors(constraints_->numJoints);

// need to make sure we're actually at a 32 byte data offset at the first offset for avx access
size_t addressOffset = 8 - (((size_t)jacobian.data() % 32) / 4);
ensureAligned(jacobian, addressOffset);
// need to make sure we're actually at a 32 byte data offset at the first offset for SIMD access
checkAlignment<kSimdAlignment>(jacobian);

// loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand Down Expand Up @@ -413,7 +399,7 @@ double SimdNormalErrorFunction::getJacobian(

usedRows = gsl::narrow_cast<int>(jacobian.rows());

// sum the avx error register for final result
// sum the SIMD error register for final result
return static_cast<float>(error);
}

Expand All @@ -426,11 +412,11 @@ size_t SimdNormalErrorFunction::getJacobianSize() const {
size_t num = 0;
for (int i = 0; i < constraints_->numJoints; i++) {
jacobianOffset_[i] = num;
const auto numPackets =
(constraints_->constraintCount[i] + kSimdPacketSize - 1) / kSimdPacketSize;
const size_t count = constraints_->constraintCount[i];
const auto numPackets = (count + kSimdPacketSize - 1) / kSimdPacketSize;
num += numPackets * kSimdPacketSize;
}
return num + kSimdPacketSize;
return num;
}

#ifdef MOMENTUM_ENABLE_AVX
Expand Down Expand Up @@ -468,9 +454,8 @@ double SimdNormalErrorFunctionAVX::getJacobian(
// storage for joint errors
std::vector<double> ets_error;

// need to make sure we're actually at a 32 byte data offset at the first offset for avx access
size_t addressOffset = 8 - (((size_t)jacobian.data() % 32) / 4);
ensureAligned(jacobian, addressOffset);
// need to make sure we're actually at a 32 byte data offset at the first offset for AVX access
checkAlignment<kAvxAlignment>(jacobian);

// loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand All @@ -482,7 +467,7 @@ double SimdNormalErrorFunctionAVX::getJacobian(
constraints_->numJoints,
[&](double& error_local, const size_t jointId) {
// get initial offset
const auto offset = jacobianOffset_[jointId] + addressOffset;
const auto offset = jacobianOffset_[jointId];

// pre-load some joint specific values
const auto& transformation = state.jointState[jointId].transformation;
Expand All @@ -497,9 +482,9 @@ double SimdNormalErrorFunctionAVX::getJacobian(

const auto jointOffset = jointId * SimdNormalConstraints::kMaxConstraints;

// loop over all constraints in increments of 8
// loop over all constraints in increments of kAvxPacketSize
const uint32_t count = constraints_->constraintCount[jointId];
for (uint32_t index = 0; index < count; index += 8) {
for (uint32_t index = 0; index < count; index += kAvxPacketSize) {
// transform offset by joint transformation : pos = transform * offset
// also transform normal by transformation : nml = (transform.linear() *
// normal).normalized()
Expand Down Expand Up @@ -687,9 +672,26 @@ double SimdNormalErrorFunctionAVX::getJacobian(

usedRows = gsl::narrow_cast<int>(jacobian.rows());

// sum the avx error register for final result
// sum the AVX error register for final result
return static_cast<float>(error);
}

size_t SimdNormalErrorFunctionAVX::getJacobianSize() const {
if (constraints_ == nullptr) {
return 0;
}

jacobianOffset_.resize(constraints_->numJoints);
size_t num = 0;
for (int i = 0; i < constraints_->numJoints; i++) {
jacobianOffset_[i] = num;
const size_t count = constraints_->constraintCount[i];
const auto numPackets = (count + kAvxPacketSize - 1) / kAvxPacketSize;
num += numPackets * kAvxPacketSize;
}
return num;
}

#endif // MOMENTUM_ENABLE_AVX

} // namespace momentum
6 changes: 5 additions & 1 deletion momentum/character_solver/simd_normal_error_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class SimdNormalErrorFunction : public SkeletonErrorFunction {
Ref<VectorXf> residual,
int& usedRows) override;

[[nodiscard]] size_t getJacobianSize() const final;
[[nodiscard]] size_t getJacobianSize() const override;

void setConstraints(const SimdNormalConstraints* cstrs) {
constraints_ = cstrs;
Expand All @@ -120,6 +120,7 @@ class SimdNormalErrorFunction : public SkeletonErrorFunction {
};

#ifdef MOMENTUM_ENABLE_AVX

// A version of SimdNormalErrorFunction where the Jacobian has been hand-unrolled using
// AVX instructions. On some platforms this performs better than the generic SIMD version
// but it only works on Intel platforms that support AVX.
Expand All @@ -141,7 +142,10 @@ class SimdNormalErrorFunctionAVX : public SimdNormalErrorFunction {
Ref<MatrixXf> jacobian,
Ref<VectorXf> residual,
int& usedRows) final;

[[nodiscard]] size_t getJacobianSize() const final;
};

#endif // MOMENTUM_ENABLE_AVX

} // namespace momentum
Loading