Skip to content

Commit

Permalink
Fix memory alignment checks in SIMD error functions (#90)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #90

Previously, a static alignment value of 32 was used for AVX in SIMD implementations. This diff modifies this approach to dynamically determine the correct alignment value via DrJit at compile-time, ensuring compatibility across different SIMD instruction sets.

Detailed changes:

- Add constants for packet sizes and alignment sizes for both DrJit and AVX versions, rather than using magic numbers
- Centralize `checkAlignment()`
- Fix `getJacobianSize()` for both DrJit and AVX versions, correctly utilizing the package size
- Update tests to check around the number of constraints around the package size to ensure the new `getJacobianSize()` works
- Update GitHub CI to test both SIMD-enabled and disabled

NOTE: This Diff the second trial of D47103616

Differential Revision: D63658278
  • Loading branch information
jeongseok-meta authored and facebook-github-bot committed Oct 1, 2024
1 parent 5780aa2 commit db0b7b5
Show file tree
Hide file tree
Showing 11 changed files with 258 additions and 139 deletions.
20 changes: 17 additions & 3 deletions .github/workflows/ci_ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,18 @@ on:

jobs:
momentum:
name: momentum-ubuntu
name: momentum-simd:${{ matrix.simd }}-ubuntu
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- simd: "ON"
pymomentum: "ON"
- simd: "OFF"
pymomentum: "OFF"
env:
MOMENTUM_ENABLE_SIMD: ${{ matrix.simd }}
steps:
- name: Checkout
uses: actions/checkout@v4
Expand All @@ -25,10 +35,14 @@ jobs:
cache: true
- name: Build and test Momentum
run: |
pixi run test
MOMENTUM_BUILD_PYMOMENTUM=$MOMENTUM_BUILD_PYMOMENTUM \
MOMENTUM_ENABLE_SIMD=$MOMENTUM_ENABLE_SIMD \
pixi run test
- name: Install Momentum and Build hello_world
run: |
pixi run install
MOMENTUM_BUILD_PYMOMENTUM=$MOMENTUM_BUILD_PYMOMENTUM \
MOMENTUM_ENABLE_SIMD=$MOMENTUM_ENABLE_SIMD \
pixi run install
pixi run cmake \
-S momentum/examples/hello_world \
-B momentum/examples/hello_world/build \
Expand Down
62 changes: 32 additions & 30 deletions momentum/character_solver/simd_normal_error_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,15 @@

namespace momentum {

namespace {

template <typename Derived>
void ensureAligned(const Eigen::MatrixBase<Derived>& mat, size_t& addressOffset) {
if (addressOffset == 8)
addressOffset = 0;
MT_CHECK(((uintptr_t)(&mat(addressOffset, 0))) % 32 == 0);
}

} // namespace

SimdNormalConstraints::SimdNormalConstraints(const Skeleton* skel) {
constexpr uintptr_t kAlignment = 32;

// resize all arrays to the number of joints
MT_CHECK(skel->joints.size() <= kMaxJoints);
numJoints = static_cast<int>(skel->joints.size());
const auto dataSize = kMaxConstraints * numJoints;
MT_CHECK(dataSize % kAlignment == 0);
MT_CHECK(dataSize % kSimdAlignment == 0);

// create memory for all floats
data = makeAlignedUnique<float, kAlignment>(dataSize * 10);
data = makeAlignedUnique<float, kSimdAlignment>(dataSize * 10);
float* dataPtr = data.get();

offsetX = dataPtr + dataSize * 0;
Expand Down Expand Up @@ -169,7 +156,7 @@ double SimdNormalErrorFunction::getError(
}
}

// sum the avx error register for final result
// sum the SIMD error register for final result
return static_cast<float>(drjit::sum(error) * kPlaneWeight * weight_);
}

Expand Down Expand Up @@ -310,9 +297,8 @@ double SimdNormalErrorFunction::getJacobian(
// storage for joint errors
std::vector<double> jointErrors(constraints_->numJoints);

// need to make sure we're actually at a 32 byte data offset at the first offset for avx access
size_t addressOffset = 8 - (((size_t)jacobian.data() % 32) / 4);
ensureAligned(jacobian, addressOffset);
// need to make sure we're actually at a 32 byte data offset at the first offset for SIMD access
checkAlignment<kSimdAlignment>(jacobian);

// loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand Down Expand Up @@ -413,7 +399,7 @@ double SimdNormalErrorFunction::getJacobian(

usedRows = gsl::narrow_cast<int>(jacobian.rows());

// sum the avx error register for final result
// sum the SIMD error register for final result
return static_cast<float>(error);
}

Expand All @@ -426,11 +412,11 @@ size_t SimdNormalErrorFunction::getJacobianSize() const {
size_t num = 0;
for (int i = 0; i < constraints_->numJoints; i++) {
jacobianOffset_[i] = num;
const auto numPackets =
(constraints_->constraintCount[i] + kSimdPacketSize - 1) / kSimdPacketSize;
const size_t count = constraints_->constraintCount[i];
const auto numPackets = (count + kSimdPacketSize - 1) / kSimdPacketSize;
num += numPackets * kSimdPacketSize;
}
return num + kSimdPacketSize;
return num;
}

#ifdef MOMENTUM_ENABLE_AVX
Expand Down Expand Up @@ -468,9 +454,8 @@ double SimdNormalErrorFunctionAVX::getJacobian(
// storage for joint errors
std::vector<double> ets_error;

// need to make sure we're actually at a 32 byte data offset at the first offset for avx access
size_t addressOffset = 8 - (((size_t)jacobian.data() % 32) / 4);
ensureAligned(jacobian, addressOffset);
// need to make sure we're actually at a 32 byte data offset at the first offset for AVX access
checkAlignment<kAvxAlignment>(jacobian);

// loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand All @@ -482,7 +467,7 @@ double SimdNormalErrorFunctionAVX::getJacobian(
constraints_->numJoints,
[&](double& error_local, const size_t jointId) {
// get initial offset
const auto offset = jacobianOffset_[jointId] + addressOffset;
const auto offset = jacobianOffset_[jointId];

// pre-load some joint specific values
const auto& transformation = state.jointState[jointId].transformation;
Expand All @@ -497,9 +482,9 @@ double SimdNormalErrorFunctionAVX::getJacobian(

const auto jointOffset = jointId * SimdNormalConstraints::kMaxConstraints;

// loop over all constraints in increments of 8
// loop over all constraints in increments of kAvxPacketSize
const uint32_t count = constraints_->constraintCount[jointId];
for (uint32_t index = 0; index < count; index += 8) {
for (uint32_t index = 0; index < count; index += kAvxPacketSize) {
// transform offset by joint transformation : pos = transform * offset
// also transform normal by transformation : nml = (transform.linear() *
// normal).normalized()
Expand Down Expand Up @@ -687,9 +672,26 @@ double SimdNormalErrorFunctionAVX::getJacobian(

usedRows = gsl::narrow_cast<int>(jacobian.rows());

// sum the avx error register for final result
// sum the AVX error register for final result
return static_cast<float>(error);
}

size_t SimdNormalErrorFunctionAVX::getJacobianSize() const {
if (constraints_ == nullptr) {
return 0;
}

jacobianOffset_.resize(constraints_->numJoints);
size_t num = 0;
for (int i = 0; i < constraints_->numJoints; i++) {
jacobianOffset_[i] = num;
const size_t count = constraints_->constraintCount[i];
const auto numPackets = (count + kAvxPacketSize - 1) / kAvxPacketSize;
num += numPackets * kAvxPacketSize;
}
return num;
}

#endif // MOMENTUM_ENABLE_AVX

} // namespace momentum
6 changes: 5 additions & 1 deletion momentum/character_solver/simd_normal_error_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class SimdNormalErrorFunction : public SkeletonErrorFunction {
Ref<VectorXf> residual,
int& usedRows) override;

[[nodiscard]] size_t getJacobianSize() const final;
[[nodiscard]] size_t getJacobianSize() const override;

void setConstraints(const SimdNormalConstraints* cstrs) {
constraints_ = cstrs;
Expand All @@ -120,6 +120,7 @@ class SimdNormalErrorFunction : public SkeletonErrorFunction {
};

#ifdef MOMENTUM_ENABLE_AVX

// A version of SimdNormalErrorFunction where the Jacobian has been hand-unrolled using
// AVX instructions. On some platforms this performs better than the generic SIMD version
// but it only works on Intel platforms that support AVX.
Expand All @@ -141,7 +142,10 @@ class SimdNormalErrorFunctionAVX : public SimdNormalErrorFunction {
Ref<MatrixXf> jacobian,
Ref<VectorXf> residual,
int& usedRows) final;

[[nodiscard]] size_t getJacobianSize() const final;
};

#endif // MOMENTUM_ENABLE_AVX

} // namespace momentum
Loading

0 comments on commit db0b7b5

Please sign in to comment.