From c3252c362c62a71e9fa93a7abfc707bf2af46757 Mon Sep 17 00:00:00 2001 From: Rene Winchenbach <rene.winchenbach@gmail.com> Date: Tue, 12 Nov 2024 10:55:41 +0100 Subject: [PATCH] Moved utils from sub branches into a single branch so all problems are solvable from the primary branch. (#18) --- CMakeLists.txt | 9 + src/util/CollisionDetection.cpp | 456 +++++++++++++++++ src/util/CollisionDetection.h | 62 +++ src/util/CollisionInfo.h | 13 + src/util/pcgsolver.h | 871 ++++++++++++++++++++++++++++++++ 5 files changed, 1411 insertions(+) create mode 100644 src/util/CollisionDetection.cpp create mode 100644 src/util/CollisionDetection.h create mode 100644 src/util/CollisionInfo.h create mode 100644 src/util/pcgsolver.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b4ddcc..29ae811 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,15 @@ file(GLOB_RECURSE PIPELINE_HEADERS ) target_sources(Template PRIVATE ${PIPELINE_SOURCES} ${PIPELINE_HEADERS}) +file(GLOB_RECURSE UTIL_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/src/util/*.cpp +) + +file(GLOB_RECURSE UTIL_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/src/util/*.h +) +target_sources(Template PRIVATE ${UTIL_SOURCES} ${UTIL_HEADERS}) + if(DEV_MODE) # In dev mode, we load resources from the source tree, so that when we # dynamically edit resources (like shaders), these are correctly diff --git a/src/util/CollisionDetection.cpp b/src/util/CollisionDetection.cpp new file mode 100644 index 0000000..fb027a2 --- /dev/null +++ b/src/util/CollisionDetection.cpp @@ -0,0 +1,456 @@ +#include <vector> +#include <glm/glm.hpp> +#include <glm/gtx/norm.hpp> +#include <iostream> +#include <util/CollisionInfo.h> +#include <util/CollisionDetection.h> +#include <glm/gtx/string_cast.hpp> + +// tool data structures/functions called by the collision detection method, you can ignore the details here +namespace collisionTools +{ + using vec3 = glm::vec3; + using mat4 = glm::mat4; + using vec4 = glm::vec4; + + vec3 getVectorConnnectingCenters(const mat4 &worldFromObj_A, const mat4 &worldFromObj_B) + { + + const vec3 worldCenter_A = worldFromObj_A * vec4(0, 0, 0, 1); + const vec3 worldCenter_B = worldFromObj_B * vec4(0, 0, 0, 1); + return worldCenter_B - worldCenter_A; + } + + // Get Corners + std::vector<vec3> getCorners(const mat4 &worldFromObj) + { + const vec3 worldCenter = worldFromObj * vec4(0, 0, 0, 1); + vec3 worldEdges[3]; + for (size_t i = 0; i < 3; ++i) + { + vec3 objEdge = vec3(0.0); + objEdge[i] = 0.5f; + worldEdges[i] = worldFromObj * vec4(objEdge, 0); + } + std::vector<vec3> results; + results.push_back(worldCenter - worldEdges[0] - worldEdges[1] - worldEdges[2]); + results.push_back(worldCenter + worldEdges[0] - worldEdges[1] - worldEdges[2]); + results.push_back(worldCenter - worldEdges[0] + worldEdges[1] - worldEdges[2]); + results.push_back(worldCenter + worldEdges[0] + worldEdges[1] - worldEdges[2]); // this +,+,- + results.push_back(worldCenter - worldEdges[0] - worldEdges[1] + worldEdges[2]); + results.push_back(worldCenter + worldEdges[0] - worldEdges[1] + worldEdges[2]); // this +,-,+ + results.push_back(worldCenter - worldEdges[0] + worldEdges[1] + worldEdges[2]); // this -,+,+ + results.push_back(worldCenter + worldEdges[0] + worldEdges[1] + worldEdges[2]); // this +,+,+ + return results; + } + + // Get Rigid Box Size + vec3 getBoxSize(const mat4 &worldFromObj) + { + vec3 size = vec3(0.0); + vec3 edges[3]; + for (size_t i = 0; i < 3; ++i) + { + vec3 objEdge = vec3(0.0); + objEdge[i] = 0.5f; + edges[i] = worldFromObj * vec4(objEdge, 0); + size[i] = 2.0f * glm::length(edges[i]); + } + return size; + } + + // Get the Normal to the faces + std::vector<vec3> getAxisNormalToFaces(const mat4 &worldFromObj) + { + std::vector<vec3> edges; + vec4 objX = vec4(1, 0, 0, 0); + vec4 objY = vec4(0, 1, 0, 0); + vec4 objZ = vec4(0, 0, 1, 0); + vec3 edge1 = glm::normalize(worldFromObj * objX); + vec3 edge2 = glm::normalize(worldFromObj * objY); + vec3 edge3 = glm::normalize(worldFromObj * objZ); + std::vector<vec3> results; + edges.push_back(edge1); + edges.push_back(edge2); + edges.push_back(edge3); + return edges; + } + + // Get the pair of edges + std::vector<vec3> getPairOfEdges(const mat4 &worldFromObj_A, const mat4 &worldFromObj_B) + { + std::vector<vec3> worldEdges1 = getAxisNormalToFaces(worldFromObj_A); + std::vector<vec3> worldEdges2 = getAxisNormalToFaces(worldFromObj_B); + + std::vector<vec3> results; + for (int i = 0; i < worldEdges1.size(); i++) + { + for (int j = 0; j < worldEdges2.size(); j++) + { + vec3 vector = glm::cross(worldEdges1[i], worldEdges2[j]); + if (glm::length(vector) > 0) + results.push_back(glm::normalize(vector)); + } + } + return results; + } + + // project a shape on an axis + Projection project(const mat4 &worldFromObj, vec3 axis) + { + // Get corners + std::vector<vec3> worldCorners = getCorners(worldFromObj); + float min = glm::dot(worldCorners[0], axis); + float max = min; + for (int i = 1; i < worldCorners.size(); i++) + { + float p = glm::dot(worldCorners[i], axis); + if (p < min) + { + min = p; + } + else if (p > max) + { + max = p; + } + } + Projection p; + p.max = max; + p.min = min; + return p; + } + + bool overlap(Projection p1, Projection p2) + { + return !((p1.max > p2.max && p1.min > p2.max) || (p2.max > p1.max && p2.min > p1.max)); + } + + float getOverlap(Projection p1, Projection p2) + { + return glm::min(p1.max, p2.max) - glm::max(p1.min, p2.min); + } + + static vec3 contactPoint( + const vec3 &pOne, + const vec3 &dOne, + float oneSize, + const vec3 &pTwo, + const vec3 &dTwo, + float twoSize, + + // If this is true, and the contact point is outside + // the edge (in the case of an edge-face contact) then + // we use one's midpoint, otherwise we use two's. + bool useOne) + { + vec3 cOne, cTwo; + + float smOne = glm::length2(dOne); + float smTwo = glm::length2(dTwo); + float dpOneTwo = glm::dot(dTwo, dOne); + + vec3 toSt = pOne - pTwo; + float dpStaOne = glm::dot(dOne, toSt); + float dpStaTwo = glm::dot(dTwo, toSt); + + float denom = smOne * smTwo - dpOneTwo * dpOneTwo; + + // Zero denominator indicates parrallel lines + if (abs(denom) < 0.0001f) + { + return useOne ? pOne : pTwo; + } + + float mua = (dpOneTwo * dpStaTwo - smTwo * dpStaOne) / denom; + float mub = (smOne * dpStaTwo - dpOneTwo * dpStaOne) / denom; + + // If either of the edges has the nearest point out + // of bounds, then the edges aren't crossed, we have + // an edge-face contact. Our point is on the edge, which + // we know from the useOne parameter. + if (mua > oneSize || + mua < -oneSize || + mub > twoSize || + mub < -twoSize) + { + return useOne ? pOne : pTwo; + } + else + { + cOne = pOne + dOne * mua; + cTwo = pTwo + dTwo * mub; + + return cOne * 0.5f + cTwo * 0.5f; + } + } + + vec3 handleVertexToface(const mat4 &worldFromObj, const vec3 &toCenter) + { + std::vector<vec3> corners = getCorners(worldFromObj); + float min = 1000; + vec3 vertex; + for (int i = 0; i < corners.size(); i++) + { + float value = glm::dot(corners[i], toCenter); + if (value < min) + { + vertex = corners[i]; + min = value; + } + } + + return vertex; + } + + CollisionInfo checkCollisionSATHelper(const mat4 &worldFromObj_A, const mat4 &worldFromObj_B, vec3 size_A, vec3 size_B) + { + CollisionInfo info; + info.isColliding = false; + vec3 collisionPoint = vec3(0.0); + float smallOverlap = 10000.0f; + vec3 axis; + int index; + int fromWhere = -1; + bool bestSingleAxis = false; + vec3 toCenter = getVectorConnnectingCenters(worldFromObj_A, worldFromObj_B); + std::vector<vec3> axes1 = getAxisNormalToFaces(worldFromObj_A); + std::vector<vec3> axes2 = getAxisNormalToFaces(worldFromObj_B); + std::vector<vec3> axes3 = getPairOfEdges(worldFromObj_A, worldFromObj_B); + // loop over the axes1 + for (int i = 0; i < axes1.size(); i++) + { + // project both shapes onto the axis + Projection p1 = project(worldFromObj_A, axes1[i]); + Projection p2 = project(worldFromObj_B, axes1[i]); + // do the projections overlap? + if (!overlap(p1, p2)) + { + // then we can guarantee that the shapes do not overlap + return info; + } + else + { + // get the overlap + float o = getOverlap(p1, p2); + // check for minimum + if (o < smallOverlap) + { + // then set this one as the smallest + smallOverlap = o; + axis = axes1[i]; + index = i; + fromWhere = 0; + } + } + } + // loop over the axes2 + for (int i = 0; i < axes2.size(); i++) + { + // project both shapes onto the axis + Projection p1 = project(worldFromObj_A, axes2[i]); + Projection p2 = project(worldFromObj_B, axes2[i]); + // do the projections overlap? + if (!overlap(p1, p2)) + { + // then we can guarantee that the shapes do not overlap + return info; + } + else + { + // get the overlap + float o = getOverlap(p1, p2); + // check for minimum + if (o < smallOverlap) + { + // then set this one as the smallest + smallOverlap = o; + axis = axes2[i]; + index = i; + fromWhere = 1; + bestSingleAxis = true; + } + } + } + int whichEdges = 0; + // loop over the axes3 + for (int i = 0; i < axes3.size(); i++) + { + // project both shapes onto the axis + Projection p1 = project(worldFromObj_A, axes3[i]); + Projection p2 = project(worldFromObj_B, axes3[i]); + // do the projections overlap? + if (!overlap(p1, p2)) + { + // then we can guarantee that the shapes do not overlap + return info; + } + else + { + // get the overlap + float o = getOverlap(p1, p2); + // check for minimum + if (o < smallOverlap) + { + // then set this one as the smallest + smallOverlap = o; + axis = axes3[i]; + index = i; + whichEdges = i; + fromWhere = 2; + } + } + } + // if we get here then we know that every axis had overlap on it + // so we can guarantee an intersection + vec3 normal; + switch (fromWhere) + { + case 0: + { + normal = axis; + if (glm::dot(axis, toCenter) <= 0) + { + normal = -normal; + } + collisionPoint = handleVertexToface(worldFromObj_B, toCenter); + } + break; + case 1: + { + normal = axis; + if (glm::dot(axis, toCenter) <= 0) + { + normal = -normal; + } + collisionPoint = handleVertexToface(worldFromObj_A, toCenter * -1.0f); + } + break; + case 2: + { + vec3 axis = glm::normalize(glm::cross(axes1[whichEdges / 3], axes2[whichEdges % 3])); + normal = axis; + if (glm::dot(axis, toCenter) <= 0) + { + normal = -normal; + } + vec4 ptOnOneEdge = vec4(0.5, 0.5, 0.5, 1); + vec4 ptOnTwoEdge = vec4(0.5, 0.5, 0.5, 1); + + for (int i = 0; i < 3; i++) + { + if (i == whichEdges / 3) + ptOnOneEdge[i] = 0; + else if (glm::dot(axes1[i], normal) < 0) + ptOnOneEdge[i] = -ptOnOneEdge[i]; + + if (i == whichEdges % 3) + ptOnTwoEdge[i] = 0; + else if (glm::dot(axes2[i], normal) > 0) + ptOnTwoEdge[i] = -ptOnTwoEdge[i]; + } + ptOnOneEdge = worldFromObj_A * ptOnOneEdge; + ptOnTwoEdge = worldFromObj_B * ptOnTwoEdge; + collisionPoint = contactPoint(ptOnOneEdge, + axes1[whichEdges / 3], + size_A[whichEdges / 3], + ptOnTwoEdge, + axes2[whichEdges % 3], + size_B[whichEdges % 3], + bestSingleAxis); + } + break; + } + + info.isColliding = true; + info.collisionPointWorld = collisionPoint; + info.depth = smallOverlap; + info.normalWorld = -normal; + return info; + } + + CollisionInfo checkCollisionSAT(glm::mat4 &worldFromObj_A, glm::mat4 &worldFromObj_B) + { + using namespace collisionTools; + vec3 calSizeA = getBoxSize(worldFromObj_A); + vec3 calSizeB = getBoxSize(worldFromObj_B); + + return checkCollisionSATHelper(worldFromObj_A, worldFromObj_B, calSizeA, calSizeB); + } + + // example of using the checkCollisionSAT function + void testCheckCollision(int caseid) + { + + if (caseid == 1) + { // simple examples, suppose that boxes A and B are cubes and have no rotation + glm::mat4 AM = glm::translate(glm::mat4(1.0), glm::vec3(1.0, 1.0, 1.0)); // box A at (1.0,1.0,1.0) + glm::mat4 BM = glm::translate(glm::mat4(1.0), glm::vec3(2.0, 2.0, 2.0)); // box B at (2.0,2.0,2.0) + + // check for collision + CollisionInfo simpletest = checkCollisionSAT(AM, BM); // should find out a collision here + if (!simpletest.isColliding) + std::cout << "No Collision" << std::endl; + else + { + std::cout << "collision detected at normal: " << simpletest.normalWorld.x << ", " << simpletest.normalWorld.y << ", " << simpletest.normalWorld.z << std::endl; + std::cout << "collision point : " << simpletest.collisionPointWorld.x << ", " << simpletest.collisionPointWorld.y << ", " << simpletest.collisionPointWorld.z << std::endl; + } + // case 1 result: + // collision detected at normal: -1.000000, -0.000000, -0.000000 + // collision point : 1.500000, 1.500000, 1.500000 + // Box A should be pushed to the left + } + else if (caseid == 2) + { // case 2, collide at a corner of Box B: + mat4 scale_A = glm::scale(mat4(1.0), vec3(9.0, 2.0, 3.0)); + mat4 translate_A = glm::translate(mat4(1.0), vec3(0.2, 5.0, 1.0)); + mat4 AM = translate_A * scale_A; + + float s = 5.656855f; + mat4 scale_B = glm::scale(mat4(1.0), vec3(s, s, 2.0)); + mat4 rotate_B = glm::rotate(mat4(1.0), glm::radians(45.0f), vec3(0, 0, 1)); + mat4 BM = rotate_B * scale_B; + + // check for collision + CollisionInfo simpletest = checkCollisionSAT(AM, BM); // should find out a collision here + + if (!simpletest.isColliding) + std::cout << "No Collision" << std::endl; + else + { + std::cout << "collision detected at normal: " << simpletest.normalWorld.x << ", " << simpletest.normalWorld.y << ", " << simpletest.normalWorld.z << std::endl; + std::cout << "collision point : " << simpletest.collisionPointWorld.x << ", " << simpletest.collisionPointWorld.y << ", " << simpletest.collisionPointWorld.z << std::endl; + } + // case 2 result: + // collision detected at normal : 0.000000, 1.000000, 0.000000 + // collision point : 0.000000, 4.000000, 1.000000 + } + else if (caseid == 3) + { // case 3, collide at a corner of Box A: + + mat4 scale_A = glm::scale(mat4(1.0), vec3(2.829f, 2.829f, 2.0f)); + mat4 rotate_A = glm::rotate(mat4(1.0), glm::radians(45.0f), vec3(0, 0, 1)); + mat4 translate_A = glm::translate(mat4(1.0), vec3(-2.0, 0.0, 1.0)); + mat4 AM = translate_A * rotate_A * scale_A; + + mat4 scale_B = glm::scale(mat4(1.0), vec3(9.0f, 2.0f, 4.0f)); + mat4 rotate_B = glm::rotate(mat4(1.0), glm::radians(90.0f), vec3(0, 0, 1)); + mat4 translate_B = glm::translate(mat4(1.0), vec3(1.0, 0.5, 0.0)); + mat4 BM = translate_B * rotate_B * scale_B; + + // check for collision + CollisionInfo simpletest = checkCollisionSAT(AM, BM); // should find out a collision here + + if (!simpletest.isColliding) + std::cout << "No Collision" << std::endl; + else + { + std::cout << "collision detected at normal: " << simpletest.normalWorld.x << ", " << simpletest.normalWorld.y << ", " << simpletest.normalWorld.z << std::endl; + std::cout << "collision point : " << simpletest.collisionPointWorld.x << ", " << simpletest.collisionPointWorld.y << ", " << simpletest.collisionPointWorld.z << std::endl; + } + // case 3 result: + // collision detected at normal: -1.000000, 0.000000, -0.000000 + // collision point : 0.000405, 0.000000, 0.000000 + } + } +} \ No newline at end of file diff --git a/src/util/CollisionDetection.h b/src/util/CollisionDetection.h new file mode 100644 index 0000000..4ce120f --- /dev/null +++ b/src/util/CollisionDetection.h @@ -0,0 +1,62 @@ +#pragma once +#include <vector> +#include <glm/glm.hpp> +#include <glm/gtx/norm.hpp> +#include <iostream> +#include <util/CollisionInfo.h> + +// tool data structures/functions called by the collision detection method, you can ignore the details here +namespace collisionTools +{ + + struct Projection + { + float min, max; + }; + + glm::vec3 getVectorConnnectingCenters(const glm::mat4 &worldFromObj_A, const glm::mat4 &worldFromObj_B); + // Get Corners + std::vector<glm::vec3> getCorners(const glm::mat4 &worldFromObj); + + // Get Rigid Box Size + glm::vec3 getBoxSize(const glm::mat4 &worldFromObj); + + // Get the Normal to the faces + std::vector<glm::vec3> getAxisNormalToFaces(const glm::mat4 &worldFromObj); + + // Get the pair of edges + std::vector<glm::vec3> getPairOfEdges(const glm::mat4 &worldFromObj_A, const glm::mat4 &worldFromObj_B); + + // project a shape on an axis + Projection project(const glm::mat4 &worldFromObj, glm::vec3 axis); + + bool overlap(Projection p1, Projection p2); + + float getOverlap(Projection p1, Projection p2); + + static glm::vec3 contactPoint( + const glm::vec3 &pOne, + const glm::vec3 &dOne, + float oneSize, + const glm::vec3 &pTwo, + const glm::vec3 &dTwo, + float twoSize, + + // If this is true, and the contact point is outside + // the edge (in the case of an edge-face contact) then + // we use one's midpoint, otherwise we use two's. + bool useOne); + + glm::vec3 handleVertexToface(const glm::mat4 &worldFromObj, const glm::vec3 &toCenter); + + CollisionInfo checkCollisionSATHelper(const glm::mat4 &worldFromObj_A, const glm::mat4 &worldFromObj_B, glm::vec3 size_A, glm::vec3 size_B); + + /* params: + obj2World_A, the transfer matrix from object space of A to the world space + obj2World_B, the transfer matrix from object space of B to the world space + */ + CollisionInfo checkCollisionSAT(glm::mat4 &worldFromObj_A, glm::mat4 &worldFromObj_B); + + // example of using the checkCollisionSAT function + void testCheckCollision(int caseid); +} \ No newline at end of file diff --git a/src/util/CollisionInfo.h b/src/util/CollisionInfo.h new file mode 100644 index 0000000..4fd97cb --- /dev/null +++ b/src/util/CollisionInfo.h @@ -0,0 +1,13 @@ +#pragma once +#include <glm/glm.hpp> + +// the return structure, with these values, you should be able to calculate the impulse +// the depth shouldn't be used in your impulse calculation, it is a redundant value +// if the normalWorld == XMVectorZero(), no collision +struct CollisionInfo +{ + bool isColliding; // whether there is a collision point, true for yes + glm::vec3 collisionPointWorld; // the position of the collision point in world space + glm::vec3 normalWorld; // the direction of the impulse to A, negative of the collision face of A + float depth; // the distance of the collision point to the surface, not necessary. +}; \ No newline at end of file diff --git a/src/util/pcgsolver.h b/src/util/pcgsolver.h new file mode 100644 index 0000000..0d3454d --- /dev/null +++ b/src/util/pcgsolver.h @@ -0,0 +1,871 @@ +// +// Preconditioned conjugate gradient solver +// +// Created by Robert Bridson, Ryoichi Ando and Nils Thuerey +// + +#ifndef RCMATRIX3_H +#define RCMATRIX3_H + +#include <iterator> +#include <cassert> +#include <vector> +#include <fstream> +#include <cmath> +#include <functional> + +// index type +#define int_index long long + +// parallelization disabled + +#define parallel_for(size) \ + { \ + int thread_number = 0; \ + int_index parallel_index = 0; \ + for (int_index parallel_index = 0; parallel_index < (int_index)size; parallel_index++) \ + { +#define parallel_end \ + } \ + thread_number = parallel_index = 0; \ + } + +#define parallel_block +#define do_parallel +#define do_end +#define block_end + +// note - "Int" instead of "N" here, the latter is size! +template <class Int, class T> +struct InstantBLAS +{ + static inline Int offset(Int N, Int incX) { return ((incX) > 0 ? 0 : ((N)-1) * (-(incX))); } + static T cblas_ddot(const Int N, const T *X, const Int incX, const T *Y, const Int incY) + { + double r = 0.0; // always use double precision internally here... + Int i; + Int ix = offset(N, incX); + Int iy = offset(N, incY); + for (i = 0; i < N; i++) + { + r += X[ix] * Y[iy]; + ix += incX; + iy += incY; + } + return (T)r; + } + static void cblas_daxpy(const Int N, const T alpha, const T *X, const Int incX, T *Y, const Int incY) + { + Int i; + if (N <= 0) + return; + if (alpha == 0.0) + return; + if (incX == 1 && incY == 1) + { + const Int m = N % 4; + for (i = 0; i < m; i++) + Y[i] += alpha * X[i]; + for (i = m; i + 3 < N; i += 4) + { + Y[i] += alpha * X[i]; + Y[i + 1] += alpha * X[i + 1]; + Y[i + 2] += alpha * X[i + 2]; + Y[i + 3] += alpha * X[i + 3]; + } + } + else + { + Int ix = offset(N, incX); + Int iy = offset(N, incY); + for (i = 0; i < N; i++) + { + Y[iy] += alpha * X[ix]; + ix += incX; + iy += incY; + } + } + } + // dot products ============================================================== + static inline T dot(const std::vector<T> &x, const std::vector<T> &y) + { + return cblas_ddot((int)x.size(), &x[0], 1, &y[0], 1); + } + + // inf-norm (maximum absolute value: index of max returned) ================== + static inline Int index_abs_max(const std::vector<T> &x) + { + int maxind = 0; + T maxvalue = 0; + for (Int i = 0; i < (Int)x.size(); ++i) + { + if (std::abs(x[i]) > maxvalue) + { + maxvalue = fabs(x[i]); + maxind = i; + } + } + return maxind; + } + + // inf-norm (maximum absolute value) ========================================= + // technically not part of BLAS, but useful + static inline T abs_max(const std::vector<T> &x) + { + return std::abs(x[index_abs_max(x)]); + } + + // saxpy (y=alpha*x+y) ======================================================= + static inline void add_scaled(T alpha, const std::vector<T> &x, std::vector<T> &y) + { + cblas_daxpy((Int)x.size(), alpha, &x[0], 1, &y[0], 1); + } +}; + +template <class T> +void zero(std::vector<T> &v) +{ + for (int i = (int)v.size() - 1; i >= 0; --i) + v[i] = 0; +} + +template <class T> +void insert(std::vector<T> &a, unsigned int index, T e) +{ + a.push_back(a.back()); + for (unsigned int i = (unsigned int)a.size() - 1; i > index; --i) + a[i] = a[i - 1]; + a[index] = e; +} + +template <class T> +void erase(std::vector<T> &a, unsigned int index) +{ + for (unsigned int i = index; i < a.size() - 1; ++i) + a[i] = a[i + 1]; + a.pop_back(); +} + +//============================================================================ +// Dynamic compressed sparse row matrix. + +template <class T> +struct SparseMatrix +{ + int n; // dimension + std::vector<std::vector<int>> index; // for each row, a list of all column indices (sorted) + std::vector<std::vector<T>> value; // values corresponding to index + + explicit SparseMatrix(int n_ = 0, int expected_nonzeros_per_row = 7) + : n(n_), index(n_), value(n_) + { + for (int i = 0; i < n; ++i) + { + index[i].reserve(expected_nonzeros_per_row); + value[i].reserve(expected_nonzeros_per_row); + } + } + + void clear(void) + { + n = 0; + index.clear(); + value.clear(); + } + + void zero(void) + { + for (int i = 0; i < n; ++i) + { + index[i].resize(0); + value[i].resize(0); + } + } + + void resize(int n_) + { + n = n_; + index.resize(n); + value.resize(n); + } + + T operator()(int i, int j) const + { + for (int k = 0; k < (int)index[i].size(); ++k) + { + if (index[i][k] == j) + return value[i][k]; + else if (index[i][k] > j) + return 0; + } + return 0; + } + + void set_element(int i, int j, T new_value) + { + int k = 0; + for (; k < (int)index[i].size(); ++k) + { + if (index[i][k] == j) + { + value[i][k] = new_value; + return; + } + else if (index[i][k] > j) + { + insert(index[i], k, j); + insert(value[i], k, new_value); + return; + } + } + index[i].push_back(j); + value[i].push_back(new_value); + } + + void add_to_element(int i, int j, T increment_value) + { + int k = 0; + for (; k < (int)index[i].size(); ++k) + { + if (index[i][k] == j) + { + value[i][k] += increment_value; + return; + } + else if (index[i][k] > j) + { + insert(index[i], k, j); + insert(value[i], k, increment_value); + return; + } + } + index[i].push_back(j); + value[i].push_back(increment_value); + } + + // assumes indices is already sorted + void add_sparse_row(int i, const std::vector<int> &indices, const std::vector<T> &values) + { + int j = 0, k = 0; + while (j < indices.size() && k < (int)index[i].size()) + { + if (index[i][k] < indices[j]) + { + ++k; + } + else if (index[i][k] > indices[j]) + { + insert(index[i], k, indices[j]); + insert(value[i], k, values[j]); + ++j; + } + else + { + value[i][k] += values[j]; + ++j; + ++k; + } + } + for (; j < indices.size(); ++j) + { + index[i].push_back(indices[j]); + value[i].push_back(values[j]); + } + } + + // assumes matrix has symmetric structure - so the indices in row i tell us which columns to delete i from + void symmetric_remove_row_and_column(int i) + { + for (int a = 0; a < index[i].size(); ++a) + { + int j = index[i][a]; // + for (int b = 0; b < index[j].size(); ++b) + { + if (index[j][b] == i) + { + erase(index[j], b); + erase(value[j], b); + break; + } + } + } + index[i].resize(0); + value[i].resize(0); + } + + void write_matlab(std::ostream &output, const char *variable_name) + { + output << variable_name << "=sparse(["; + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < index[i].size(); ++j) + { + output << i + 1 << " "; + } + } + output << "],...\n ["; + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < index[i].size(); ++j) + { + output << index[i][j] + 1 << " "; + } + } + output << "],...\n ["; + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < value[i].size(); ++j) + { + output << value[i][j] << " "; + } + } + output << "], " << n << ", " << n << ");" << std::endl; + } +}; + +typedef SparseMatrix<float> SparseMatrixf; +typedef SparseMatrix<double> SparseMatrixd; + +// perform result=matrix*x +template <class T> +void multiply(const SparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result) +{ + assert(matrix.n == x.size()); + result.resize(matrix.n); + // for(int i=0; i<matrix.n; ++i) + parallel_for(matrix.n) + { + unsigned i(parallel_index); + T value = 0; + for (int j = 0; j < (int)matrix.index[i].size(); ++j) + { + value += matrix.value[i][j] * x[matrix.index[i][j]]; + } + result[i] = value; + } + parallel_end +} + +// perform result=result-matrix*x +template <class T> +void multiply_and_subtract(const SparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result) +{ + assert(matrix.n == x.size()); + result.resize(matrix.n); + for (int i = 0; i < (int)matrix.n; ++i) + { + for (int j = 0; j < (int)matrix.index[i].size(); ++j) + { + result[i] -= matrix.value[i][j] * x[matrix.index[i][j]]; + } + } +} + +//============================================================================ +// Fixed version of SparseMatrix. This is not a good structure for dynamically +// modifying the matrix, but can be significantly faster for matrix-vector +// multiplies due to better data locality. + +template <class T> +struct FixedSparseMatrix +{ + int n; // dimension + std::vector<T> value; // nonzero values row by row + std::vector<int> colindex; // corresponding column indices + std::vector<int> rowstart; // where each row starts in value and colindex (and last entry is one past the end, the number of nonzeros) + + explicit FixedSparseMatrix(int n_ = 0) + : n(n_), value(0), colindex(0), rowstart(n_ + 1) + { + } + + void clear(void) + { + n = 0; + value.clear(); + colindex.clear(); + rowstart.clear(); + } + + void resize(int n_) + { + n = n_; + rowstart.resize(n + 1); + } + + void construct_from_matrix(const SparseMatrix<T> &matrix) + { + resize(matrix.n); + rowstart[0] = 0; + for (int i = 0; i < n; ++i) + { + rowstart[i + 1] = rowstart[i] + matrix.index[i].size(); + } + value.resize(rowstart[n]); + colindex.resize(rowstart[n]); + int j = 0; + for (int i = 0; i < n; ++i) + { + for (int k = 0; k < (int)matrix.index[i].size(); ++k) + { + value[j] = matrix.value[i][k]; + colindex[j] = matrix.index[i][k]; + ++j; + } + } + } + + void write_matlab(std::ostream &output, const char *variable_name) + { + output << variable_name << "=sparse(["; + for (int i = 0; i < n; ++i) + { + for (int j = rowstart[i]; j < rowstart[i + 1]; ++j) + { + output << i + 1 << " "; + } + } + output << "],...\n ["; + for (int i = 0; i < n; ++i) + { + for (int j = rowstart[i]; j < rowstart[i + 1]; ++j) + { + output << colindex[j] + 1 << " "; + } + } + output << "],...\n ["; + for (int i = 0; i < n; ++i) + { + for (int j = rowstart[i]; j < rowstart[i + 1]; ++j) + { + output << value[j] << " "; + } + } + output << "], " << n << ", " << n << ");" << std::endl; + } +}; + +// perform result=matrix*x +template <class T> +void multiply(const FixedSparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result) +{ + assert(matrix.n == x.size()); + result.resize(matrix.n); + // for(int i=0; i<matrix.n; ++i) + parallel_for(matrix.n) + { + unsigned i(parallel_index); + T value = 0; + for (int j = matrix.rowstart[i]; j < matrix.rowstart[i + 1]; ++j) + { + value += matrix.value[j] * x[matrix.colindex[j]]; + } + result[i] = value; + } + parallel_end +} + +// perform result=result-matrix*x +template <class T> +void multiply_and_subtract(const FixedSparseMatrix<T> &matrix, const std::vector<T> &x, std::vector<T> &result) +{ + assert(matrix.n == x.size()); + result.resize(matrix.n); + for (int i = 0; i < matrix.n; ++i) + { + for (int j = matrix.rowstart[i]; j < matrix.rowstart[i + 1]; ++j) + { + result[i] -= matrix.value[j] * x[matrix.colindex[j]]; + } + } +} + +//============================================================================ +// A simple compressed sparse column data structure (with separate diagonal) +// for lower triangular matrices + +template <class T> +struct SparseColumnLowerFactor +{ + int n; + std::vector<T> invdiag; // reciprocals of diagonal elements + std::vector<T> value; // values below the diagonal, listed column by column + std::vector<int> rowindex; // a list of all row indices, for each column in turn + std::vector<int> colstart; // where each column begins in rowindex (plus an extra entry at the end, of #nonzeros) + std::vector<T> adiag; // just used in factorization: minimum "safe" diagonal entry allowed + + explicit SparseColumnLowerFactor(int n_ = 0) + : n(n_), invdiag(n_), colstart(n_ + 1), adiag(n_) + { + } + + void clear(void) + { + n = 0; + invdiag.clear(); + value.clear(); + rowindex.clear(); + colstart.clear(); + adiag.clear(); + } + + void resize(int n_) + { + n = n_; + invdiag.resize(n); + colstart.resize(n + 1); + adiag.resize(n); + } + + void write_matlab(std::ostream &output, const char *variable_name) + { + output << variable_name << "=sparse(["; + for (int i = 0; i < n; ++i) + { + output << " " << i + 1; + for (int j = colstart[i]; j < colstart[i + 1]; ++j) + { + output << " " << rowindex[j] + 1; + } + } + output << "],...\n ["; + for (int i = 0; i < n; ++i) + { + output << " " << i + 1; + for (int j = colstart[i]; j < colstart[i + 1]; ++j) + { + output << " " << i + 1; + } + } + output << "],...\n ["; + for (int i = 0; i < n; ++i) + { + output << " " << (invdiag[i] != 0 ? 1 / invdiag[i] : 0); + for (int j = colstart[i]; j < colstart[i + 1]; ++j) + { + output << " " << value[j]; + } + } + output << "], " << n << ", " << n << ");" << std::endl; + } +}; + +//============================================================================ +// Incomplete Cholesky factorization, level zero, with option for modified version. +// Set modification_parameter between zero (regular incomplete Cholesky) and +// one (fully modified version), with values close to one usually giving the best +// results. The min_diagonal_ratio parameter is used to detect and correct +// problems in factorization: if a pivot is this much less than the diagonal +// entry from the original matrix, the original matrix entry is used instead. + +template <class T> +void factor_modified_incomplete_cholesky0(const SparseMatrix<T> &matrix, SparseColumnLowerFactor<T> &factor, + T modification_parameter = 0.97, T min_diagonal_ratio = 0.25) +{ + // first copy lower triangle of matrix into factor (Note: assuming A is symmetric of course!) + factor.resize(matrix.n); + zero(factor.invdiag); // important: eliminate old values from previous solves! + factor.value.resize(0); + factor.rowindex.resize(0); + zero(factor.adiag); + for (int i = 0; i < matrix.n; ++i) + { + factor.colstart[i] = (int)factor.rowindex.size(); + for (int j = 0; j < (int)matrix.index[i].size(); ++j) + { + if (matrix.index[i][j] > i) + { + factor.rowindex.push_back(matrix.index[i][j]); + factor.value.push_back(matrix.value[i][j]); + } + else if (matrix.index[i][j] == i) + { + factor.invdiag[i] = factor.adiag[i] = matrix.value[i][j]; + } + } + } + factor.colstart[matrix.n] = (int)factor.rowindex.size(); + // now do the incomplete factorization (figure out numerical values) + + // MATLAB code: + // L=tril(A); + // for k=1:size(L,2) + // L(k,k)=sqrt(L(k,k)); + // L(k+1:end,k)=L(k+1:end,k)/L(k,k); + // for j=find(L(:,k))' + // if j>k + // fullupdate=L(:,k)*L(j,k); + // incompleteupdate=fullupdate.*(A(:,j)~=0); + // missing=sum(fullupdate-incompleteupdate); + // L(j:end,j)=L(j:end,j)-incompleteupdate(j:end); + // L(j,j)=L(j,j)-omega*missing; + // end + // end + // end + + for (int k = 0; k < matrix.n; ++k) + { + if (factor.adiag[k] == 0) + continue; // null row/column + // figure out the final L(k,k) entry + if (factor.invdiag[k] < min_diagonal_ratio * factor.adiag[k]) + factor.invdiag[k] = 1 / sqrt(factor.adiag[k]); // drop to Gauss-Seidel here if the pivot looks dangerously small + else + factor.invdiag[k] = 1 / sqrt(factor.invdiag[k]); + // finalize the k'th column L(:,k) + for (int p = factor.colstart[k]; p < factor.colstart[k + 1]; ++p) + { + factor.value[p] *= factor.invdiag[k]; + } + // incompletely eliminate L(:,k) from future columns, modifying diagonals + for (int p = factor.colstart[k]; p < factor.colstart[k + 1]; ++p) + { + int j = factor.rowindex[p]; // work on column j + T multiplier = factor.value[p]; + T missing = 0; + int a = factor.colstart[k]; + // first look for contributions to missing from dropped entries above the diagonal in column j + int b = 0; + while (a < factor.colstart[k + 1] && factor.rowindex[a] < j) + { + // look for factor.rowindex[a] in matrix.index[j] starting at b + while (b < (int)matrix.index[j].size()) + { + if (matrix.index[j][b] < factor.rowindex[a]) + ++b; + else if (matrix.index[j][b] == factor.rowindex[a]) + break; + else + { + missing += factor.value[a]; + break; + } + } + ++a; + } + // adjust the diagonal j,j entry + if (a < factor.colstart[k + 1] && factor.rowindex[a] == j) + { + factor.invdiag[j] -= multiplier * factor.value[a]; + } + ++a; + // and now eliminate from the nonzero entries below the diagonal in column j (or add to missing if we can't) + b = factor.colstart[j]; + while (a < factor.colstart[k + 1] && b < factor.colstart[j + 1]) + { + if (factor.rowindex[b] < factor.rowindex[a]) + ++b; + else if (factor.rowindex[b] == factor.rowindex[a]) + { + factor.value[b] -= multiplier * factor.value[a]; + ++a; + ++b; + } + else + { + missing += factor.value[a]; + ++a; + } + } + // and if there's anything left to do, add it to missing + while (a < factor.colstart[k + 1]) + { + missing += factor.value[a]; + ++a; + } + // and do the final diagonal adjustment from the missing entries + factor.invdiag[j] -= modification_parameter * multiplier * missing; + } + } +} + +//============================================================================ +// Solution routines with lower triangular matrix. + +// solve L*result=rhs +template <class T> +void solve_lower(const SparseColumnLowerFactor<T> &factor, const std::vector<T> &rhs, std::vector<T> &result) +{ + assert(factor.n == rhs.size()); + assert(factor.n == result.size()); + result = rhs; + for (int i = 0; i < factor.n; ++i) + { + result[i] *= factor.invdiag[i]; + for (int j = factor.colstart[i]; j < factor.colstart[i + 1]; ++j) + { + result[factor.rowindex[j]] -= factor.value[j] * result[i]; + } + } +} + +// solve L^T*result=rhs +template <class T> +void solve_lower_transpose_in_place(const SparseColumnLowerFactor<T> &factor, std::vector<T> &x) +{ + assert(factor.n == (int)x.size()); + assert(factor.n > 0); + int i = factor.n; + do + { + --i; + for (int j = factor.colstart[i]; j < factor.colstart[i + 1]; ++j) + { + x[i] -= factor.value[j] * x[factor.rowindex[j]]; + } + x[i] *= factor.invdiag[i]; + } while (i != 0); +} + +//============================================================================ +// Encapsulates the Conjugate Gradient algorithm with incomplete Cholesky +// factorization preconditioner. + +template <class T> +struct SparsePCGSolver +{ + SparsePCGSolver(void) + { + set_solver_parameters(1e-5, 100, 0.97, 0.25); + } + + void set_solver_parameters(T tolerance_factor_, int max_iterations_, T modified_incomplete_cholesky_parameter_ = 0.97, T min_diagonal_ratio_ = 0.25) + { + tolerance_factor = tolerance_factor_; + if (tolerance_factor < 1e-30) + tolerance_factor = 1e-30; + max_iterations = max_iterations_; + modified_incomplete_cholesky_parameter = modified_incomplete_cholesky_parameter_; + min_diagonal_ratio = min_diagonal_ratio_; + } + + bool solve(const SparseMatrix<T> &matrix, const std::vector<T> &rhs, std::vector<T> &result, T &relative_residual_out, int &iterations_out, int precondition = 2) + { + int n = matrix.n; + if ((int)m.size() != n) + { + m.resize(n); + s.resize(n); + z.resize(n); + r.resize(n); + } + zero(result); + r = rhs; + double residual_out = InstantBLAS<int, T>::abs_max(r); + if (residual_out == 0) + { + iterations_out = 0; + return true; + } + // double tol=tolerance_factor*residual_out; // relative residual + double tol = tolerance_factor; + double residual_0 = residual_out; + + form_preconditioner(matrix, precondition); + apply_preconditioner(r, z, precondition); + double rho = InstantBLAS<int, T>::dot(z, r); + if (rho == 0 || rho != rho) + { + iterations_out = 0; + return false; + } + + s = z; + fixed_matrix.construct_from_matrix(matrix); + int iteration; + for (iteration = 0; iteration < max_iterations; ++iteration) + { + multiply(fixed_matrix, s, z); + double alpha = rho / InstantBLAS<int, T>::dot(s, z); + InstantBLAS<int, T>::add_scaled(alpha, s, result); + InstantBLAS<int, T>::add_scaled(-alpha, z, r); + residual_out = InstantBLAS<int, T>::abs_max(r); + relative_residual_out = residual_out / residual_0; + if (residual_out <= tol) + { + iterations_out = iteration + 1; + return true; + } + apply_preconditioner(r, z, precondition); + double rho_new = InstantBLAS<int, T>::dot(z, r); + double beta = rho_new / rho; + InstantBLAS<int, T>::add_scaled(beta, s, z); + s.swap(z); // s=beta*s+z + rho = rho_new; + } + iterations_out = iteration; + relative_residual_out = residual_out / residual_0; + return false; + } + +protected: + // internal structures + SparseColumnLowerFactor<T> ic_factor; // modified incomplete cholesky factor + std::vector<T> m, z, s, r; // temporary vectors for PCG + FixedSparseMatrix<T> fixed_matrix; // used within loop + + // parameters + T tolerance_factor; + int max_iterations; + T modified_incomplete_cholesky_parameter; + T min_diagonal_ratio; + + void form_preconditioner(const SparseMatrix<T> &matrix, int precondition = 2) + { + if (precondition == 2) + { + // incomplete cholesky + factor_modified_incomplete_cholesky0(matrix, ic_factor, modified_incomplete_cholesky_parameter, min_diagonal_ratio); + } + else if (precondition == 1) + { + // diagonal + ic_factor.resize(matrix.n); + zero(ic_factor.invdiag); + for (int i = 0; i < matrix.n; ++i) + { + for (int j = 0; j < (int)matrix.index[i].size(); ++j) + { + if (matrix.index[i][j] == i) + { + ic_factor.invdiag[i] = 1. / matrix.value[i][j]; + } + } + } + } + } + + void apply_preconditioner(const std::vector<T> &x, std::vector<T> &result, int precondition = 2) + { + if (precondition == 2) + { + // incomplete cholesky + solve_lower(ic_factor, x, result); + solve_lower_transpose_in_place(ic_factor, result); + } + else if (precondition == 1) + { + // diagonal + for (int_index i = 0; i < (int_index)result.size(); ++i) + { + result[i] = x[i] * ic_factor.invdiag[i]; + } + } + else + { + // off + result = x; + } + } +}; + +#undef parallel_for +#undef parallel_end +#undef int_index + +#undef parallel_block +#undef do_parallel +#undef do_end +#undef block_end + +#endif \ No newline at end of file