diff --git a/CHANGES.rst b/CHANGES.rst index 8ae1420..f64ebab 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,13 @@ Release Notes - Fix problem reported where angle of 180 degrees results in an error with the C version of the code. [#223] +- Use a different algorithm for square root in the ``qd`` library that + seems to be less prone to accuracy loss. This helps the bug in the + ``math_util.c`` due to which ``angle()`` returns a NaN for + coplanar vectors where the angle between the surface points should be + 180 degrees and enhances the solution from #223. Also, use ``qd`` epsilon + instead of arbitrary value for rounding error check. [#224] + 1.2.22 (04-January-2022) ======================== diff --git a/libqd/include/qd/c_dd.h b/libqd/include/qd/c_dd.h index 203a8fa..7ffcb01 100644 --- a/libqd/include/qd/c_dd.h +++ b/libqd/include/qd/c_dd.h @@ -90,6 +90,8 @@ void c_dd_comp(const double *a, const double *b, int *result); void c_dd_comp_dd_d(const double *a, double b, int *result); void c_dd_comp_d_dd(double a, const double *b, int *result); void c_dd_pi(double *a); +void c_dd_2pi(double *a); +double c_dd_epsilon(void); #ifdef __cplusplus } diff --git a/libqd/include/qd/c_qd.h b/libqd/include/qd/c_qd.h index 9062d1d..bbe5898 100644 --- a/libqd/include/qd/c_qd.h +++ b/libqd/include/qd/c_qd.h @@ -7,7 +7,7 @@ * * Copyright (c) 2000-2001 * - * Contains C wrapper function prototypes for quad-double precision + * Contains C wrapper function prototypes for quad-double precision * arithmetic. This can also be used from fortran code. */ #ifndef _QD_C_QD_H @@ -65,7 +65,7 @@ void c_qd_copy(const double *a, double *b); void c_qd_copy_dd(const double *a, double *b); void c_qd_copy_d(double a, double *b); -void c_qd_sqrt(const double *a, double *b); +int c_qd_sqrt(const double *a, double *b); void c_qd_sqr(const double *a, double *b); void c_qd_abs(const double *a, double *b); @@ -111,6 +111,8 @@ void c_qd_comp(const double *a, const double *b, int *result); void c_qd_comp_qd_d(const double *a, double b, int *result); void c_qd_comp_d_qd(double a, const double *b, int *result); void c_qd_pi(double *a); +void c_qd_2pi(double *a); +double c_qd_epsilon(void); #ifdef __cplusplus } diff --git a/libqd/include/qd/qd_real.h b/libqd/include/qd/qd_real.h index 32079d0..0149b0e 100644 --- a/libqd/include/qd/qd_real.h +++ b/libqd/include/qd/qd_real.h @@ -9,8 +9,8 @@ * * Quad-double precision (>= 212-bit significand) floating point arithmetic * package, written in ANSI C++, taking full advantage of operator overloading. - * Uses similar techniques as that of David Bailey's double-double package - * and that of Jonathan Shewchuk's adaptive precision floating point + * Uses similar techniques as that of David Bailey's double-double package + * and that of Jonathan Shewchuk's adaptive precision floating point * arithmetic package. See * * http://www.nersc.gov/~dhbailey/mpdist/mpdist.html @@ -120,16 +120,16 @@ struct QD_API qd_real { static qd_real rand(void); void to_digits(char *s, int &expn, int precision = _ndigits) const; - void write(char *s, int len, int precision = _ndigits, + void write(char *s, int len, int precision = _ndigits, bool showpos = false, bool uppercase = false) const; - std::string to_string(int precision = _ndigits, int width = 0, - std::ios_base::fmtflags fmt = static_cast(0), + std::string to_string(int precision = _ndigits, int width = 0, + std::ios_base::fmtflags fmt = static_cast(0), bool showpos = false, bool uppercase = false, char fill = ' ') const; static int read(const char *s, qd_real &a); /* Debugging methods */ void dump(const std::string &name = "", std::ostream &os = std::cerr) const; - void dump_bits(const std::string &name = "", + void dump_bits(const std::string &name = "", std::ostream &os = std::cerr) const; static qd_real debug_rand(); @@ -150,7 +150,7 @@ namespace std { } QD_API qd_real polyeval(const qd_real *c, int n, const qd_real &x); -QD_API qd_real polyroot(const qd_real *c, int n, +QD_API qd_real polyroot(const qd_real *c, int n, const qd_real &x0, int max_iter = 64, double thresh = 0.0); QD_API qd_real qdrand(void); @@ -190,6 +190,7 @@ QD_API qd_real operator/(double a, const qd_real &b); QD_API qd_real sqr(const qd_real &a); QD_API qd_real sqrt(const qd_real &a); +QD_API qd_real fsqrt(const qd_real &a, int &flag); QD_API qd_real pow(const qd_real &a, int n); QD_API qd_real pow(const qd_real &a, const qd_real &b); QD_API qd_real npwr(const qd_real &a, int n); diff --git a/libqd/src/c_dd.cpp b/libqd/src/c_dd.cpp index 1cb2989..1b93f59 100644 --- a/libqd/src/c_dd.cpp +++ b/libqd/src/c_dd.cpp @@ -283,7 +283,7 @@ void c_dd_comp(const double *a, const double *b, int *result) { *result = -1; else if (aa > bb) *result = 1; - else + else *result = 0; } @@ -293,7 +293,7 @@ void c_dd_comp_dd_d(const double *a, double b, int *result) { *result = -1; else if (aa > bb) *result = 1; - else + else *result = 0; } @@ -303,7 +303,7 @@ void c_dd_comp_d_dd(double a, const double *b, int *result) { *result = -1; else if (aa > bb) *result = 1; - else + else *result = 0; } @@ -311,4 +311,12 @@ void c_dd_pi(double *a) { TO_DOUBLE_PTR(dd_real::_pi, a); } +void c_dd_2pi(double *a) { + TO_DOUBLE_PTR(dd_real::_2pi, a); +} + +double c_dd_epsilon(void) { + return (double) std::numeric_limits::epsilon(); +} + } diff --git a/libqd/src/c_qd.cpp b/libqd/src/c_qd.cpp index 010cf85..77c05ec 100644 --- a/libqd/src/c_qd.cpp +++ b/libqd/src/c_qd.cpp @@ -237,11 +237,14 @@ void c_qd_copy_d(double a, double *b) { } -void c_qd_sqrt(const double *a, double *b) { +int c_qd_sqrt(const double *a, double *b) { + int flag; qd_real bb; - bb = sqrt(qd_real(a)); + bb = fsqrt(qd_real(a), flag); TO_DOUBLE_PTR(bb, b); + return flag; } + void c_qd_sqr(const double *a, double *b) { qd_real bb; bb = sqr(qd_real(a)); @@ -419,7 +422,7 @@ void c_qd_comp(const double *a, const double *b, int *result) { *result = -1; else if (aa > bb) *result = 1; - else + else *result = 0; } @@ -429,7 +432,7 @@ void c_qd_comp_qd_d(const double *a, double b, int *result) { *result = -1; else if (aa > b) *result = 1; - else + else *result = 0; } @@ -439,7 +442,7 @@ void c_qd_comp_d_qd(double a, const double *b, int *result) { *result = -1; else if (a > bb) *result = 1; - else + else *result = 0; } @@ -447,4 +450,13 @@ void c_qd_pi(double *a) { TO_DOUBLE_PTR(qd_real::_pi, a); } +void c_qd_2pi(double *a) { + TO_DOUBLE_PTR(qd_real::_2pi, a); +} + +double c_qd_epsilon(void) { + return (double) std::numeric_limits::epsilon(); +} + + } diff --git a/libqd/src/qd_real.cpp b/libqd/src/qd_real.cpp index 4863b53..67f14c3 100644 --- a/libqd/src/qd_real.cpp +++ b/libqd/src/qd_real.cpp @@ -62,7 +62,7 @@ qd_real nint(const qd_real &a) { if (x1 == a[1]) { /* Second double is already an integer. */ x2 = nint(a[2]); - + if (x2 == a[2]) { /* Third double is already an integer. */ x3 = nint(a[3]); @@ -84,7 +84,7 @@ qd_real nint(const qd_real &a) { x0 -= 1.0; } } - + renorm(x0, x1, x2, x3); return qd_real(x0, x1, x2, x3); } @@ -96,7 +96,7 @@ qd_real floor(const qd_real &a) { if (x0 == a[0]) { x1 = std::floor(a[1]); - + if (x1 == a[1]) { x2 = std::floor(a[2]); @@ -119,7 +119,7 @@ qd_real ceil(const qd_real &a) { if (x0 == a[0]) { x1 = std::ceil(a[1]); - + if (x1 == a[1]) { x2 = std::ceil(a[2]); @@ -195,7 +195,7 @@ istream &operator>>(istream &s, qd_real &qd) { ostream &operator<<(ostream &os, const qd_real &qd) { bool showpos = (os.flags() & ios_base::showpos) != 0; bool uppercase = (os.flags() & ios_base::uppercase) != 0; - return os << qd.to_string(os.precision(), os.width(), os.flags(), + return os << qd.to_string(os.precision(), os.width(), os.flags(), showpos, uppercase, os.fill()); } @@ -248,7 +248,7 @@ int qd_real::read(const char *s, qd_real &qd) { break; default: return -1; - + } } @@ -350,9 +350,9 @@ void qd_real::to_digits(char *s, int &expn, int precision) const { } /* If first digit is 10, shift everything. */ - if (s[0] > '9') { - e++; - for (i = precision; i >= 2; i--) s[i] = s[i-1]; + if (s[0] > '9') { + e++; + for (i = precision; i >= 2; i--) s[i] = s[i-1]; s[0] = '1'; s[1] = '0'; } @@ -363,10 +363,10 @@ void qd_real::to_digits(char *s, int &expn, int precision) const { /* Writes the quad-double number into the character array s of length len. The integer d specifies how many significant digits to write. - The string s must be able to hold at least (d+8) characters. + The string s must be able to hold at least (d+8) characters. showpos indicates whether to use the + sign, and uppercase indicates whether the E or e is to be used for the exponent. */ -void qd_real::write(char *s, int len, int precision, +void qd_real::write(char *s, int len, int precision, bool showpos, bool uppercase) const { string str = to_string(precision, 0, ios_base::scientific, showpos, uppercase); strncpy(s, str.c_str(), len-1); @@ -407,7 +407,7 @@ void round_string_qd(char *s, int precision, int *offset){ } -string qd_real::to_string(int precision, int width, ios_base::fmtflags fmt, +string qd_real::to_string(int precision, int width, ios_base::fmtflags fmt, bool showpos, bool uppercase, char fill) const { string s; bool fixed = (fmt & ios_base::fixed) != 0; @@ -523,7 +523,6 @@ string qd_real::to_string(int precision, int width, ios_base::fmtflags fmt, if( fabs( from_string / this->x[0] ) > 3.0 ){ int point_position; - char temp; // loop on the string, find the point, move it up one // don't act on the first character @@ -740,51 +739,65 @@ qd_real qd_real::accurate_div(const qd_real &a, const qd_real &b) { return qd_real(q0, q1, q2, q3); } -QD_API qd_real sqrt(const qd_real &a) { - /* Strategy: - - Perform the following Newton iteration: +QD_API qd_real fsqrt(const qd_real &a, int &flag) { + /* Uses Heron's method, see: + https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method - x' = x + (1 - a * x^2) * x / 2; - - which converges to 1/sqrt(a), starting with the - double precision approximation to 1/sqrt(a). - Since Newton's iteration more or less doubles the - number of correct digits, we only need to perform it - twice. + 1. x0 = approximate sqrt(a); + 2. x_{n+1} = (1/2) * (x_n + a / x_n); + 3. repeat 2 until corrections are small */ + int i; + double e, eps; + + qd_real r, diff; + qd_real half = "0.5000000000000000000000000000000000" + "000000000000000000000000000000000000"; if (a.is_zero()) - return 0.0; + return (qd_real) 0.0; if (a.is_negative()) { qd_real::error("(qd_real::sqrt): Negative argument."); return qd_real::_nan; } - qd_real r = (1.0 / std::sqrt(a[0])); - qd_real h = mul_pwr2(a, 0.5); + eps = std::numeric_limits::epsilon(); - r += ((0.5 - h * sqr(r)) * r); - r += ((0.5 - h * sqr(r)) * r); - r += ((0.5 - h * sqr(r)) * r); + qd_real x = std::sqrt(a[0]); + qd_real y; - r *= a; - return r; + for (i=0; i < 10; i++) { + y = half * (x + a / x); + diff = x - y; + x = y; + e = fabs(((diff[3] + diff[2]) + diff[1]) + diff[0]); + if (e < fabs(x.x[0]) * eps) { + flag = 0; // convergence achieved + return x; + } + } + + flag = 1; // failed to converge + return x; } +QD_API qd_real sqrt(const qd_real &a) { + int flag; + return fsqrt(a, flag); +} /* Computes the n-th root of a */ qd_real nroot(const qd_real &a, int n) { /* Strategy: Use Newton's iteration to solve - + 1/(x^n) - a = 0 Newton iteration becomes x' = x + x * (1 - a * x^n) / n - Since Newton's iteration converges quadratically, + Since Newton's iteration converges quadratically, we only need to perform it twice. */ @@ -860,13 +873,13 @@ static const qd_real inv_fact[n_inv_fact] = { qd_real exp(const qd_real &a) { /* Strategy: We first reduce the size of x by noting that - + exp(kr + m * log(2)) = 2^m * exp(r)^k where m and k are integers. By choosing m appropriately - we can make |kr| <= log(2) / 2 = 0.347. Then exp(r) is - evaluated using the familiar Taylor series. Reducing the - argument substantially speeds up the convergence. */ + we can make |kr| <= log(2) / 2 = 0.347. Then exp(r) is + evaluated using the familiar Taylor series. Reducing the + argument substantially speeds up the convergence. */ const double k = ldexp(1.0, 16); const double inv_k = 1.0 / k; @@ -929,11 +942,11 @@ qd_real log(const qd_real &a) { using Newton iteration. The iteration is given by - x' = x - f(x)/f'(x) + x' = x - f(x)/f'(x) = x - (1 - a * exp(-x)) = x + a * exp(-x) - 1. - - Two iteration is needed, since Newton's iteration + + Two iteration is needed, since Newton's iteration approximately doubles the number of digits per iteration. */ if (a.is_one()) { @@ -1999,7 +2012,7 @@ static const qd_real cos_table [] = { /* Computes sin(a) and cos(a) using Taylor series. Assumes |a| <= pi/2048. */ -static void sincos_taylor(const qd_real &a, +static void sincos_taylor(const qd_real &a, qd_real &sin_a, qd_real &cos_a) { const double thresh = 0.5 * qd_real::_eps * std::abs(to_double(a)); qd_real p, s, t, x; @@ -2327,7 +2340,7 @@ qd_real atan(const qd_real &a) { } qd_real atan2(const qd_real &y, const qd_real &x) { - /* Strategy: Instead of using Taylor series to compute + /* Strategy: Instead of using Taylor series to compute arctan, we instead use Newton's iteration to solve the equation @@ -2340,12 +2353,12 @@ qd_real atan2(const qd_real &y, const qd_real &x) { z' = z - (x - cos(z)) / sin(z) (for equation 2) Here, x and y are normalized so that x^2 + y^2 = 1. - If |x| > |y|, then first iteration is used since the + If |x| > |y|, then first iteration is used since the denominator is larger. Otherwise, the second is used. */ if (x.is_zero()) { - + if (y.is_zero()) { /* Both x and y is zero. */ qd_real::error("(qd_real::atan2): Both arguments zero."); @@ -2441,7 +2454,7 @@ qd_real acos(const qd_real &a) { return atan2(sqrt(1.0 - sqr(a)), a); } - + qd_real sinh(const qd_real &a) { if (a.is_zero()) { return 0.0; @@ -2542,7 +2555,7 @@ QD_API qd_real qdrand() { qd_real r = 0.0; double d; - /* Strategy: Generate 31 bits at a time, using lrand48 + /* Strategy: Generate 31 bits at a time, using lrand48 random number generator. Shift the bits, and repeat 7 times. */ @@ -2561,7 +2574,7 @@ QD_API qd_real qdrand() { qd_real polyeval(const qd_real *c, int n, const qd_real &x) { /* Just use Horner's method of polynomial evaluation. */ qd_real r = c[n]; - + for (int i = n-1; i >= 0; i--) { r *= x; r += c[i]; @@ -2571,10 +2584,10 @@ qd_real polyeval(const qd_real *c, int n, const qd_real &x) { } /* polyroot(c, n, x0) - Given an n-th degree polynomial, finds a root close to + Given an n-th degree polynomial, finds a root close to the given guess x0. Note that this uses simple Newton iteration scheme, and does not work for multiple roots. */ -QD_API qd_real polyroot(const qd_real *c, int n, +QD_API qd_real polyroot(const qd_real *c, int n, const qd_real &x0, int max_iter, double thresh) { qd_real x = x0; qd_real f; diff --git a/spherical_geometry/tests/test_basic.py b/spherical_geometry/tests/test_basic.py index b9e688c..c7a95a5 100644 --- a/spherical_geometry/tests/test_basic.py +++ b/spherical_geometry/tests/test_basic.py @@ -513,3 +513,17 @@ def test_math_util_angle_domain(): def test_math_util_length_domain(): with pytest.raises(ValueError): math_util.length([[np.nan, 0, 0]], [[0, 0, np.inf]]) + +def test_math_util_angle_nearly_coplanar_vec(): + # test from issue #222 + extra values + vectors = [ + 5 * [[1.0, 1.0, 1.0]], + 5 * [[1, 0.9999999, 1]], + [[1, 0.5, 1], [1, 0.15, 1], [1, 0.001, 1], [1, 0.15, 1], [-1, 0.1, -1]] + ] + angles = math_util.angle(*vectors) + + assert np.allclose(angles[:-1], np.pi, rtol=0, atol=1e-16) + assert np.allclose(angles[-1], 0, rtol=0, atol=1e-32) + + diff --git a/src/math_util.c b/src/math_util.c index 2db95cf..7c14675 100644 --- a/src/math_util.c +++ b/src/math_util.c @@ -8,6 +8,7 @@ #include "qd/c_qd.h" + /* The intersects, length and intersects_point calculations use "double double" representation internally, as supported by libqd. Emperical @@ -63,6 +64,8 @@ typedef struct { double x[4]; } qd; +double QD_ONE[4] = {1.0, 0.0, 0.0, 0.0}; + static NPY_INLINE void load_point(const char *in, const intp s, double* out) { out[0] = (*(double *)in); @@ -144,8 +147,8 @@ normalize_qd(const qd *A, qd *B) { PyErr_SetString(PyExc_ValueError, "Domain error in sqrt"); return 1; } - c_qd_sqrt(T[3], l); + for (i = 0; i < 3; ++i) { c_qd_div(A[i].x, l, B[i].x); } @@ -166,6 +169,48 @@ dot_qd(const qd *A, const qd *B, qd *C) { c_qd_add(tmp[3], tmp[2], C->x); } +/* + normalized_dot_qd returns dot product of normalized input vectors. +*/ +static NPY_INLINE int +normalized_dot_qd(const qd *A, const qd *B, qd *dot_val) { + int i, flag; + qd aa, bb, ab; + double aabb[4]; + double norm[4]; + double *v0 = dot_val->x; + double *v1 = dot_val->x + 1; + double eps = 10.0 * c_qd_epsilon(); + + dot_qd(A, A, &aa); + dot_qd(B, B, &bb); + dot_qd(A, B, &ab); + c_qd_mul(aa.x, bb.x, aabb); + + if (aabb[0] < -0.0) { + PyErr_SetString(PyExc_ValueError, "Domain error in sqrt"); + return 1; + } + + flag = c_qd_sqrt(aabb, norm); + + if (norm[0] == 0.0) { + /* return non-normalized value: */ + PyErr_SetString(PyExc_ValueError, "Null vector."); + c_qd_copy(ab.x, dot_val->x); + return 1; + } else { + c_qd_div(ab.x, norm, dot_val->x); + } + + if ((*v0 == 1.0 && *v1 > 0.0 && *v1 < eps) || + (*v0 == -1.0 && *v1 < 0.0 && *v1 > -eps)) { + c_qd_copy_d(dot_val->x[0], dot_val->x); + } + + return 0; +} + static NPY_INLINE double sign(const double A) { #if defined(_MSC_VER) || defined(__MINGW32__) @@ -662,20 +707,21 @@ char *angle_signature = "(i),(i),(i)->()"; static void DOUBLE_angle(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func)) { + int comp; qd A[3]; qd B[3]; qd C[3]; qd ABX[3]; qd BCX[3]; - qd TMP[3]; qd X[3]; qd diff; qd inner; - qd angle; + double angle[4]; + double abs_inner[4]; - double dangle; + double _2pi[4]; unsigned int old_cw; @@ -692,46 +738,27 @@ DOUBLE_angle(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func)) load_point_qd(ip3, is3, C); cross_qd(A, B, ABX); - - if (normalize_qd(ABX, ABX)) return; - cross_qd(C, B, BCX); - - if (normalize_qd(BCX, BCX)) return; - cross_qd(ABX, BCX, X); - - if (normalize_qd(X, X)) return; - dot_qd(B, X, &diff); - dot_qd(ABX, BCX, &inner); - - /* The following threshold is currently arbitrary and - is based on observed errors in that value and several - orders of magnitude larger than those. One day someone - should determine how qd is producing those errors, - but for now... - */ - if (fabs(inner.x[0]) == 1.0 && fabs(inner.x[1]) < 1e-60) { - inner.x[1] = 0.; - inner.x[2] = 0.; - inner.x[3] = 0.; - } - if (inner.x[0] != inner.x[0] || - inner.x[0] < -1.0 || - inner.x[0] > 1.0) { + if (normalized_dot_qd(ABX, BCX, &inner)) return; + + c_qd_abs(inner.x, abs_inner); + c_qd_comp(abs_inner, QD_ONE, &comp); + if (inner.x[0] != inner.x[0] || comp == 1) { PyErr_SetString(PyExc_ValueError, "Out of domain for acos"); return; } - c_qd_acos(inner.x, angle.x); - dangle = angle.x[0]; + c_qd_acos(inner.x, angle); - if (diff.x[0] < 0.0) { - dangle = 2.0 * NPY_PI - dangle; + c_qd_comp_qd_d(diff.x, 0.0, &comp); + if (comp == -1) { + c_qd_2pi(_2pi); + c_qd_sub(_2pi, angle, angle); } - *((double *)op) = dangle; + *((double *)op) = angle[0]; END_OUTER_LOOP fpu_fix_end(&old_cw);