You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
918 lines
34 KiB
918 lines
34 KiB
#ifndef ALGOIM_BERNSTEIN_HPP
|
|
#define ALGOIM_BERNSTEIN_HPP
|
|
|
|
// algoim::bernstein implements several routines for working with multivariate Bernstein
|
|
// polynomials. Many of these methods, especially the orthant, root finding, Sylvester
|
|
// and Bezout methods, are based on those described in the paper
|
|
// R. I. Saye, High-order quadrature on multi-component domains implicitly defined
|
|
// by multivariate polynomials, Journal of Computational Physics, 448, 110720 (2022),
|
|
// https://doi.org/10.1016/j.jcp.2021.110720
|
|
|
|
#include <cassert>
|
|
#include <cmath>
|
|
#include "real.hpp"
|
|
#include "uvector.hpp"
|
|
#include "xarray.hpp"
|
|
#include "sparkstack.hpp"
|
|
#include "binomial.hpp"
|
|
#include "utility.hpp"
|
|
|
|
// Some methods rely on a LAPACK implementation to solve
|
|
// generalised eigenvalue problems and SVD factorisation
|
|
#if __has_include(<lapacke.h>)
|
|
#include <lapacke.h>
|
|
#elif __has_include(<mkl_lapacke.h>)
|
|
#include <mkl_lapacke.h>
|
|
#else
|
|
#error \
|
|
"Algoim requires a LAPACKE implementation to compute eigenvalues and SVD factorisations, but a suitable lapacke.h include file was not found; did you forget to specify its include path?"
|
|
#endif
|
|
|
|
namespace algoim::bernstein
|
|
{
|
|
// Evaluate at x the P Bernstein basis functions of degree P-1
|
|
// out: array of length P
|
|
template <typename T>
|
|
void evalBernsteinBasis(const T& x, int P, T* out)
|
|
{
|
|
assert(P >= 1);
|
|
const real* binom = Binomial::row(P - 1);
|
|
T p = 1.0;
|
|
for (int i = 0; i < P; ++i) {
|
|
out[i] = p * binom[i];
|
|
p *= x;
|
|
}
|
|
p = 1.0;
|
|
for (int i = P - 1; i >= 0; --i) {
|
|
out[i] *= p;
|
|
p *= 1.0 - x;
|
|
}
|
|
}
|
|
|
|
// Evaluate an N-dimensional Bernstein polynomial at x
|
|
template <int N>
|
|
real evalBernsteinPoly(const xarray<real, N>& beta, const uvector<real, N>& x)
|
|
{
|
|
uvector<real*, N> basis;
|
|
algoim_spark_alloc_vec(real, basis, beta.ext());
|
|
for (int i = 0; i < N; ++i) evalBernsteinBasis(x(i), beta.ext(i), basis(i));
|
|
real r = 0.0;
|
|
for (auto i = beta.loop(); ~i; ++i) {
|
|
real s = beta.l(i);
|
|
for (int dim = 0; dim < N; ++dim) s *= basis(dim)[i(dim)];
|
|
r += s;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
// Fast evaluation of a 1-D Bernstein polynomial and its derivative; it is assumed that
|
|
// binom == Binomial::row(P - 1), left to the caller to evaluate and cache, for speed
|
|
void bernsteinValueAndDerivative(const real* alpha, int P, const real* binom, real x, real& value, real& deriv)
|
|
{
|
|
assert(P > 1);
|
|
real *a, *b;
|
|
algoim_spark_alloc(real, &a, P, &b, P);
|
|
a[0] = 1;
|
|
for (int i = 1; i < P; ++i) a[i] = a[i - 1] * x;
|
|
b[0] = 1;
|
|
for (int i = 1; i < P; ++i) b[i] = b[i - 1] * (1 - x);
|
|
value = alpha[0] * b[P - 1] + alpha[P - 1] * a[P - 1];
|
|
for (int i = 1; i < P - 1; ++i) value += alpha[i] * binom[i] * a[i] * b[P - 1 - i];
|
|
deriv = (alpha[P - 1] * a[P - 2] - alpha[0] * b[P - 2]) * (P - 1);
|
|
for (int i = 1; i < P - 1; ++i)
|
|
deriv += alpha[i] * binom[i] * (a[i - 1] * b[P - 1 - i] * i - a[i] * b[P - 2 - i] * (P - 1 - i));
|
|
}
|
|
|
|
// Evaluate the gradient of an N-dimensional Bernstein polynomial at x
|
|
template <int N>
|
|
uvector<real, N> evalBernsteinPolyGradient(const xarray<real, N>& beta, const uvector<real, N>& x)
|
|
{
|
|
uvector<real*, N> basis, prime;
|
|
algoim_spark_alloc_vec(real, basis, beta.ext());
|
|
algoim_spark_alloc_vec(real, prime, beta.ext());
|
|
for (int i = 0; i < N; ++i) {
|
|
int P = beta.ext(i);
|
|
assert(P >= 1);
|
|
evalBernsteinBasis(x(i), P, basis(i));
|
|
if (P > 1) {
|
|
real* buff;
|
|
algoim_spark_alloc(real, &buff, P - 1);
|
|
evalBernsteinBasis(x(i), P - 1, buff);
|
|
prime(i)[0] = (P - 1) * (-buff[0]);
|
|
prime(i)[P - 1] = (P - 1) * (buff[P - 2]);
|
|
for (int j = 1; j < P - 1; ++j) prime(i)[j] = (P - 1) * (buff[j - 1] - buff[j]);
|
|
} else
|
|
prime(i)[0] = 0.0;
|
|
}
|
|
uvector<real, N> g = real(0.0);
|
|
for (auto i = beta.loop(); ~i; ++i) {
|
|
for (int j = 0; j < N; ++j) {
|
|
real s = beta.l(i);
|
|
for (int dim = 0; dim < N; ++dim)
|
|
if (dim == j)
|
|
s *= prime(dim)[i(dim)];
|
|
else
|
|
s *= basis(dim)[i(dim)];
|
|
g(j) += s;
|
|
}
|
|
}
|
|
return g;
|
|
}
|
|
|
|
// Assuming p is represented in scaled Bernstein coefficients, reverse that scaling
|
|
template <int N>
|
|
void reverseScaledCoeff(xarray<real, N>& p)
|
|
{
|
|
uvector<const real*, N> binom;
|
|
for (int i = 0; i < N; ++i) binom(i) = Binomial::row(p.ext(i) - 1);
|
|
for (auto i = p.loop(); i; ++i) {
|
|
real alpha = 1;
|
|
for (int dim = 0; dim < N; ++dim) alpha *= binom(dim)[i(dim)];
|
|
p.l(i) /= alpha;
|
|
}
|
|
}
|
|
|
|
// Squared L2 norm of a Bernstein polynomial; the result may be negative, but only if
|
|
// the polynomial is essentially machine zero
|
|
template <int N>
|
|
real squaredL2norm(const xarray<real, N>& p)
|
|
{
|
|
uvector<const real*, N> b1, b2;
|
|
for (int dim = 0; dim < N; ++dim) {
|
|
b1(dim) = Binomial::row(p.ext(dim) - 1);
|
|
b2(dim) = Binomial::row(2 * p.ext(dim) - 2);
|
|
}
|
|
real delta = 0;
|
|
for (auto i = p.loop(); ~i; ++i)
|
|
for (auto j = p.loop(); ~j; ++j) {
|
|
real g = 1;
|
|
for (int dim = 0; dim < N; ++dim) g *= (b1(dim)[i(dim)] / b2(dim)[i(dim) + j(dim)]) * b1(dim)[j(dim)];
|
|
delta += p.l(i) * p.l(j) * g;
|
|
}
|
|
for (int dim = 0; dim < N; ++dim) delta /= 2 * p.ext(dim) - 1;
|
|
return delta;
|
|
}
|
|
|
|
// Collapse a multivariate Bernstein polynomial along a given axis-aligned line, i.e., the
|
|
// set of points x where x(dim) is free and x(i) = x0(i) for all i != dim
|
|
// out: array of length beta.ext(dim)
|
|
template <int N>
|
|
void collapseAlongAxis(const xarray<real, N>& beta, const uvector<real, N - 1>& x0, int dim, real* out)
|
|
{
|
|
if constexpr (N == 1) {
|
|
assert(dim == 0);
|
|
for (int i = 0; i < beta.ext(0); ++i) out[i] = beta[i];
|
|
} else {
|
|
assert(0 <= dim && dim < N);
|
|
uvector<real*, N - 1> basis;
|
|
algoim_spark_alloc_vec(real, basis, remove_component(beta.ext(), dim));
|
|
for (int i = 0; i < N - 1; ++i) {
|
|
int P = beta.ext(i < dim ? i : i + 1);
|
|
evalBernsteinBasis(x0(i), P, basis(i));
|
|
}
|
|
int P = beta.ext(dim);
|
|
for (int i = 0; i < P; ++i) out[i] = 0.0;
|
|
for (auto i = beta.loop(); ~i; ++i) {
|
|
real s = beta.l(i);
|
|
for (int j = 0; j < N; ++j)
|
|
if (j < dim)
|
|
s *= basis(j)[i(j)];
|
|
else if (j > dim)
|
|
s *= basis(j - 1)[i(j)];
|
|
out[i(dim)] += s;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Collapse a multivariate Bernstein polynomial along a given axis-orthogonal hyperplane,
|
|
// i.e., the set of points x where x(dim) is a fixed given value
|
|
template <int N>
|
|
void collapseAlongHyperplane(const xarray<real, N>& beta, int dim, real x, xarray<real, N - 1>& out)
|
|
{
|
|
static_assert(N > 1, "N > 1 required");
|
|
assert(all(out.ext() == remove_component(beta.ext(), dim)));
|
|
assert(0 <= dim && dim < N);
|
|
int P = beta.ext(dim);
|
|
real* basis;
|
|
algoim_spark_alloc(real, &basis, P);
|
|
evalBernsteinBasis(x, P, basis);
|
|
out = 0.0;
|
|
for (auto i = beta.loop(); i; ++i) out.m(remove_component(i(), dim)) += beta.l(i) * basis[i(dim)];
|
|
}
|
|
|
|
// Normalise polynomial by its largest (in absolute value) coefficient
|
|
template <int N>
|
|
void normalise(xarray<real, N>& alpha)
|
|
{
|
|
real x = alpha.maxNorm();
|
|
if (x > 0) alpha *= real(1) / x;
|
|
}
|
|
|
|
// Applying a simple examination of coefficient signs, returns +1 if the
|
|
// polynomial is uniformly positive, -1 if the polynomial is uniformly
|
|
// negative, or 0 if no guarantees can be made
|
|
template <int N>
|
|
int uniformSign(const xarray<real, N>& beta)
|
|
{
|
|
int s = util::sign(beta[0]);
|
|
for (int i = 1; i < beta.size(); ++i)
|
|
if (util::sign(beta[i]) != s) return 0;
|
|
return s;
|
|
}
|
|
|
|
// Compute coefficients of derivative in lower degree basis
|
|
// alpha: array of length P
|
|
// out: array of length P - 1
|
|
template <typename T>
|
|
void bernsteinDerivative(const T* alpha, int P, T* out)
|
|
{
|
|
assert(P >= 2);
|
|
for (int i = 0; i < P - 1; ++i) {
|
|
out[i] = alpha[i + 1];
|
|
out[i] -= alpha[i];
|
|
out[i] *= P - 1;
|
|
}
|
|
}
|
|
|
|
// Compute the derivative of a Bernstein polynomial
|
|
template <int N>
|
|
void bernsteinDerivative(const xarray<real, N>& a, int dim, xarray<real, N>& out)
|
|
{
|
|
assert(all(out.ext() == inc_component(a.ext(), dim, -1)));
|
|
int P = a.ext(dim);
|
|
assert(P >= 2);
|
|
// if (P < 2) {
|
|
// int aaa = 1;
|
|
// int bbb = 1;
|
|
// }
|
|
for (auto i = out.loop(); ~i; ++i) out.l(i) = a.m(i.shifted(dim, 1)) - a.m(i());
|
|
out *= P - 1;
|
|
}
|
|
|
|
// Compute the derivative of a Bernstein polynomial, in the original basis;
|
|
// equivalent to computing normal derivative, and then elevating once
|
|
template <int N>
|
|
void elevatedDerivative(const xarray<real, N>& a, int dim, xarray<real, N>& out)
|
|
{
|
|
assert(all(out.ext() == a.ext()) && 0 <= dim && dim < N);
|
|
int P = a.ext(dim);
|
|
for (auto i = a.loop(); ~i; ++i) {
|
|
if (i(dim) == 0)
|
|
out.l(i) = (a.m(i.shifted(dim, 1)) - a.l(i)) * (P - 1);
|
|
else if (i(dim) == P - 1)
|
|
out.l(i) = (a.l(i) - a.m(i.shifted(dim, -1))) * (P - 1);
|
|
else
|
|
out.l(i) =
|
|
a.m(i.shifted(dim, -1)) * (-i(dim)) + a.l(i) * (2 * i(dim) - P + 1) + a.m(i.shifted(dim, 1)) * (P - 1 - i(dim));
|
|
}
|
|
}
|
|
|
|
// Apply de Casteljau algorithm to compute the Bernstein coefficients of alpha relative to the interval [0,tau]
|
|
template <int N>
|
|
void deCasteljauLeft(xarray<real, N>& alpha, real tau)
|
|
{
|
|
int P = alpha.ext(0);
|
|
for (int i = 1; i < P; ++i)
|
|
for (int j = P - 1; j >= i; --j) {
|
|
alpha.a(j) *= tau; // here the ptr to the array is included in the return
|
|
alpha.a(j) += alpha.a(j - 1) * (1.0 - tau);
|
|
}
|
|
}
|
|
|
|
// Apply de Casteljau algorithm to compute the Bernstein coefficients of alpha relative to the interval [tau,1]
|
|
template <int N>
|
|
void deCasteljauRight(xarray<real, N>& alpha, real tau)
|
|
{
|
|
int P = alpha.ext(0);
|
|
for (int i = 1; i < P; ++i)
|
|
for (int j = 0; j < P - i; ++j) {
|
|
alpha.a(j) *= (1.0 - tau);
|
|
alpha.a(j) += alpha.a(j + 1) * tau;
|
|
}
|
|
}
|
|
|
|
// Apply de Casteljau algorithm to compute the Bernstein coefficients of alpha relative
|
|
// to the hyperrectangle [a,b]. It is assumed that the given arrays a & b each have
|
|
// length at least N. If, for a particular dimension, a[dim] > b[dim], both the interval
|
|
// and coefficients are reversed.
|
|
template <int N, bool B = false>
|
|
void deCasteljau(xarray<real, N>& alpha, const real* a, const real* b)
|
|
{
|
|
using std::abs;
|
|
using std::swap;
|
|
if constexpr (N == 1 || B) {
|
|
int P = alpha.ext(0);
|
|
if (*b < *a) {
|
|
deCasteljau<N, B>(alpha, b, a);
|
|
for (int i = 0; i < P / 2; ++i) swap(alpha.a(i), alpha.a(P - 1 - i));
|
|
return;
|
|
}
|
|
if (abs(*b) >= abs(*a - 1)) {
|
|
deCasteljauLeft(alpha, *b);
|
|
deCasteljauRight(alpha, *a / *b);
|
|
} else {
|
|
deCasteljauRight(alpha, *a);
|
|
deCasteljauLeft(alpha, (*b - *a) / (real(1) - *a));
|
|
}
|
|
} else {
|
|
deCasteljau<2, true>(alpha.flatten().ref(), a, b);
|
|
for (int i = 0; i < alpha.ext(0); ++i) deCasteljau(alpha.slice(i).ref(), a + 1, b + 1);
|
|
}
|
|
}
|
|
|
|
// Apply de Casteljau algorithm to compute the Bernstein coefficients of alpha relative
|
|
// to the hyperrectangle [a,b]. If, for a particular dimension, a[dim] > b[dim], both the
|
|
// interval and coefficients are reversed.
|
|
template <int N>
|
|
void deCasteljau(const xarray<real, N>& alpha, const uvector<real, N>& a, const uvector<real, N>& b, xarray<real, N>& out)
|
|
{
|
|
assert(all(out.ext() == alpha.ext()));
|
|
out = alpha; // 这里是一个深拷贝,alpha不会被修改
|
|
deCasteljau(out, a.data(), b.data());
|
|
}
|
|
|
|
// Elevate the degree of a Bernstein polynomial
|
|
template <int N, bool B = false>
|
|
void bernsteinElevate(const xarray<real, N>& alpha, xarray<real, N>& beta)
|
|
{
|
|
assert(all(beta.ext() >= alpha.ext()));
|
|
if constexpr (N == 1 || B) {
|
|
int P = alpha.ext(0), Q = beta.ext(0);
|
|
if (P == Q) {
|
|
for (int k = 0; k < P; ++k) beta.a(k) = alpha.a(k);
|
|
} else {
|
|
int n = P - 1;
|
|
int r = Q - 1 - n;
|
|
if (r == 1) {
|
|
beta.a(0) = alpha.a(0);
|
|
beta.a(n + 1) = alpha.a(n);
|
|
for (int k = 1; k <= n; ++k) {
|
|
beta.a(k) = alpha.a(k - 1) * (real(k) / real(n + 1));
|
|
beta.a(k) += alpha.a(k) * (real(1) - real(k) / real(n + 1));
|
|
}
|
|
return;
|
|
}
|
|
const real* bn = Binomial::row(n);
|
|
const real* br = Binomial::row(r);
|
|
const real* bnr = Binomial::row(n + r);
|
|
for (int k = 0; k <= n + r; ++k) {
|
|
beta.a(k) = 0.0;
|
|
for (int j = std::max(0, k - r); j <= std::min(n, k); ++j)
|
|
beta.a(k) += alpha.a(j) * ((br[k - j] * bn[j]) / bnr[k]);
|
|
}
|
|
}
|
|
} else {
|
|
xarray<real, N> gamma(nullptr, set_component(alpha.ext(), 0, beta.ext(0)));
|
|
algoim_spark_alloc(real, gamma);
|
|
bernsteinElevate<2, true>(alpha.flatten(), gamma.flatten().ref());
|
|
for (int i = 0; i < beta.ext(0); ++i) bernsteinElevate(gamma.slice(i), beta.slice(i).ref());
|
|
}
|
|
}
|
|
|
|
namespace detail
|
|
{
|
|
// Compute least squares solution of Ax=b, where A is a (P+1) x P lower bidiagonal matrix, with
|
|
// diagonal given by 'alpha', lower diagonal by 'beta', each of length P, and the rhs is given
|
|
// by 'b', of length P+1; the algorithm applies QR with Givens, which shall create an upper
|
|
// bidiagonal R; no pivoting is performed, and the complexity is O(P) for QR factorisation and
|
|
// O(P*O) for back-solve on rhs of size PxO
|
|
// in: alpha, length P, shall be overwritten
|
|
// in: beta, length P, shall be overwritten
|
|
// in: b, overwritten with first P rows yielding least squares solution
|
|
void lsqr_bidiagonal(real* alpha, real* beta, int P, xarray<real, 2>& b)
|
|
{
|
|
assert(b.ext(0) == P + 1 && b.ext(1) > 0);
|
|
real* gamma;
|
|
algoim_spark_alloc_def(real, 0, &gamma, P);
|
|
for (int i = 0; i < P; ++i) {
|
|
real c, s;
|
|
util::givens_get(alpha[i], beta[i], c, s);
|
|
util::givens_rotate(alpha[i], beta[i], c, s);
|
|
if (i < P - 1) util::givens_rotate(gamma[i + 1], alpha[i + 1], c, s);
|
|
for (int k = 0; k < b.ext(1); ++k) util::givens_rotate(b(i, k), b(i + 1, k), c, s);
|
|
}
|
|
b.a(P - 1) /= alpha[P - 1];
|
|
for (int i = P - 2; i >= 0; --i) {
|
|
b.a(i) -= b.a(i + 1) * gamma[i + 1];
|
|
b.a(i) /= alpha[i];
|
|
}
|
|
}
|
|
} // namespace detail
|
|
|
|
// Reduce by one the effective degree of a Bernstein polynomial; this routine
|
|
// mainly makes sense when the actual polynomial degree is less than the one
|
|
// used in its (starting) Bernstein polynomial representation
|
|
template <int N, bool B = false>
|
|
void bernsteinReduction(xarray<real, N>& alpha, int dim)
|
|
{
|
|
assert(all(alpha.ext() >= 1) && 0 <= dim && dim < N && alpha.ext(dim) >= 2);
|
|
if (dim == 0) {
|
|
int P = alpha.ext(0) - 1;
|
|
real *a, *b;
|
|
algoim_spark_alloc(real, &a, P, &b, P);
|
|
a[0] = 1;
|
|
b[P - 1] = 1;
|
|
for (int k = 1; k < P; ++k) {
|
|
a[k] = real(1) - real(k) / real(P);
|
|
b[k - 1] = real(k) / real(P);
|
|
}
|
|
xarray<real, 2> view(alpha.data(), uvector<int, 2>{P + 1, prod(alpha.ext(), 0)});
|
|
detail::lsqr_bidiagonal(a, b, P, view);
|
|
} else if constexpr (N > 1) {
|
|
for (int i = 0; i < alpha.ext(0); ++i) bernsteinReduction<N - 1, true>(alpha.slice(i).ref(), dim - 1);
|
|
}
|
|
|
|
if (!B) {
|
|
xarray<real, N> beta(nullptr, alpha.ext());
|
|
algoim_spark_alloc(real, beta);
|
|
beta = alpha;
|
|
alpha.alterExtent(inc_component(alpha.ext(), dim, -1));
|
|
for (auto i = alpha.loop(); ~i; ++i) alpha.l(i) = beta.m(i());
|
|
}
|
|
}
|
|
|
|
// Automatically reduce the degree of alpha; returns true iff degree reduction occurred
|
|
template <int N>
|
|
bool autoReduction(xarray<real, N>& alpha, real tol = 1.0e3 * std::numeric_limits<real>::epsilon(), int dim = 0)
|
|
{
|
|
using std::abs;
|
|
using std::sqrt;
|
|
if (dim < 0 || dim >= N || tol <= 0) return false;
|
|
bool stay = false;
|
|
if (alpha.ext(dim) >= 2) {
|
|
xarray<real, N> beta(nullptr, alpha.ext()), gamma(nullptr, alpha.ext());
|
|
algoim_spark_alloc(real, beta, gamma);
|
|
beta = alpha;
|
|
bernsteinReduction(beta, dim);
|
|
bernsteinElevate(beta, gamma);
|
|
gamma -= alpha;
|
|
real delta = sqrt(abs(squaredL2norm(gamma)));
|
|
real norm = sqrt(abs(squaredL2norm(alpha)));
|
|
if (delta < tol * norm) {
|
|
alpha.alterExtent(beta.ext());
|
|
alpha = beta;
|
|
stay = true;
|
|
}
|
|
}
|
|
if (stay) {
|
|
autoReduction<N>(alpha, tol, dim);
|
|
return true;
|
|
} else
|
|
return autoReduction<N>(alpha, tol, dim + 1);
|
|
}
|
|
|
|
// Determine if there is a scalar alpha such that sign x(i) + alpha y(i) > 0 for every component i;
|
|
// if sign = 0, then returns true if it holds for sign = 1 and/or sign = -1
|
|
template <int N>
|
|
bool orthantTestBase(const xarray<real, N>& x, const xarray<real, N>& y, int sign = 0)
|
|
{
|
|
assert(sign == 0 || sign == -1 || sign == 1);
|
|
assert(all(x.ext() == y.ext()));
|
|
using std::abs;
|
|
using std::isinf;
|
|
using std::max;
|
|
using std::min;
|
|
if (sign == 0) return orthantTestBase(x, y, -1) || orthantTestBase(x, y, 1);
|
|
real alphaMax = std::numeric_limits<real>::infinity();
|
|
real alphaMin = -std::numeric_limits<real>::infinity();
|
|
for (int i = 0; i < x.size(); ++i) {
|
|
if (y[i] == 0.0 && x[i] * sign <= 0.0) return false;
|
|
if (y[i] > 0.0)
|
|
alphaMin = max(alphaMin, -x[i] / y[i] * sign);
|
|
else if (y[i] < 0.0)
|
|
alphaMax = min(alphaMax, -x[i] / y[i] * sign);
|
|
}
|
|
if (isinf(alphaMin) || isinf(alphaMax)) return true;
|
|
if (alphaMax - alphaMin > 1.0e5 * std::numeric_limits<real>::epsilon() * max(abs(alphaMin), abs(alphaMax))) return true;
|
|
return false;
|
|
}
|
|
|
|
// Determine if there are scalars alpha and beta such that {alpha f + beta g > 0} holds for every
|
|
// Bernstein coefficient of f and g: if one of the polynomials has a smaller degree than the other,
|
|
// it is degree elevated so that the two polynomials have the same degree
|
|
template <int N>
|
|
bool orthantTest(const xarray<real, N>& f, const xarray<real, N>& g)
|
|
{
|
|
if (all(f.ext() == g.ext()))
|
|
return orthantTestBase(f, g);
|
|
else {
|
|
uvector<int, N> ext = max(f.ext(), g.ext());
|
|
xarray<real, N> fe(nullptr, ext), ge(nullptr, ext);
|
|
algoim_spark_alloc(real, fe, ge);
|
|
bernsteinElevate(f, fe);
|
|
bernsteinElevate(g, ge);
|
|
return orthantTestBase(fe, ge);
|
|
}
|
|
}
|
|
|
|
// Modified Chebyshev nodes (which include endpoints) for interpolating degree P-1 polynomials
|
|
inline real modifiedChebyshevNode(int i, int P)
|
|
{
|
|
assert(0 <= i && i < P);
|
|
using std::cos;
|
|
if (P == 1) {
|
|
int aaa = 1;
|
|
int bbb = 1;
|
|
}
|
|
return 0.5 - 0.5 * cos(util::pi * i / (P - 1));
|
|
}
|
|
|
|
// Methods to compute, and cache, the SVD for Bernstein interpolation based on modified Chebysev nodes
|
|
struct BernsteinVandermondeSVD {
|
|
struct SVD // 将矩阵A分解为U * diag(sigma) * Vt,其中U和Vt是正交矩阵,sigma是对角矩阵
|
|
{
|
|
real* U;
|
|
real* Vt;
|
|
real* sigma;
|
|
};
|
|
|
|
static SVD get(int P)
|
|
{
|
|
assert(P >= 1);
|
|
static thread_local std::unordered_map<int, std::vector<real>> cache;
|
|
if (cache.count(P) == 1) {
|
|
real* base = cache.at(P).data();
|
|
return SVD{base, base + P * P, base + 2 * P * P};
|
|
}
|
|
|
|
real *A, *superb, *basis;
|
|
algoim_spark_alloc(real, &A, P * P, &superb, P, &basis, P);
|
|
for (int i = 0; i < P; ++i) {
|
|
evalBernsteinBasis(modifiedChebyshevNode(i, P), P, basis);
|
|
for (int j = 0; j < P; ++j) A[i * P + j] = basis[j];
|
|
}
|
|
|
|
cache[P].resize(P * P + P * P + P);
|
|
real* base = cache[P].data();
|
|
SVD result{base, base + P * P, base + 2 * P * P};
|
|
|
|
static_assert(
|
|
std::is_same_v<real, double>,
|
|
"Algoim's default LAPACK code assumes real == double; a custom SVD solver is required when real != double");
|
|
int info = LAPACKE_dgesvd(LAPACK_ROW_MAJOR, 'A', 'A', P, P, A, P, result.sigma, result.U, P, result.Vt, P, superb);
|
|
if (info != 0) {
|
|
std::cerr << "LAPACKE_dgesvd call failed (algoim::bernstein::BernsteinVandermondeSVD::get), info = " << info
|
|
<< std::endl;
|
|
}
|
|
assert(info == 0 && "LAPACKE_dgesvd call failed (algoim::bernstein::BernsteinVandermondeSVD::get)");
|
|
return result;
|
|
}
|
|
};
|
|
|
|
// Interpolate tensor-product data f, assumed to be nodal values at the same nodes returned by modifiedChebyshevNode()
|
|
template <int N, bool B = false>
|
|
void bernsteinInterpolate(const xarray<real, N>& f, real tol, xarray<real, N>& out)
|
|
{
|
|
assert(all(out.ext() == f.ext()));
|
|
if constexpr (N == 1 || B) {
|
|
int P = f.ext(0);
|
|
int O = prod(f.ext(), 0);
|
|
assert(P >= 1 && O >= 1);
|
|
|
|
real* tmp;
|
|
algoim_spark_alloc(real, &tmp, P * O);
|
|
|
|
auto svd = BernsteinVandermondeSVD::get(P);
|
|
|
|
for (int i = 0; i < P * O; ++i) tmp[i] = 0.0;
|
|
for (int i = 0; i < P; ++i)
|
|
for (int j = 0; j < P; ++j)
|
|
for (int k = 0; k < O; ++k) tmp[i * O + k] += svd.U[j * P + i] * f[j * O + k];
|
|
|
|
real minsigma = tol * svd.sigma[0];
|
|
for (int i = 0; i < P; ++i) {
|
|
real alpha = (svd.sigma[i] >= minsigma) ? (real(1) / svd.sigma[i]) : 0.0;
|
|
for (int k = 0; k < O; ++k) tmp[i * O + k] *= alpha;
|
|
}
|
|
|
|
out = 0;
|
|
for (int i = 0; i < P; ++i)
|
|
for (int j = 0; j < P; ++j)
|
|
for (int k = 0; k < O; ++k) out[i * O + k] += svd.Vt[j * P + i] * tmp[j * O + k];
|
|
} else {
|
|
xarray<real, N> gamma(nullptr, f.ext());
|
|
algoim_spark_alloc(real, gamma);
|
|
bernsteinInterpolate<2, true>(f.flatten(), tol, gamma.flatten().ref());
|
|
for (int i = 0; i < f.ext(0); ++i) bernsteinInterpolate(gamma.slice(i), tol, out.slice(i).ref());
|
|
}
|
|
}
|
|
|
|
// Interpolate a functional through its nodal evaluation at the modifiedChebyshevNode() points
|
|
template <int N, typename F>
|
|
void bernsteinInterpolate(F&& f, xarray<real, N>& out)
|
|
{
|
|
xarray<real, N> ff(nullptr, out.ext());
|
|
algoim_spark_alloc(real, ff);
|
|
for (auto i = ff.loop(); ~i; ++i) {
|
|
uvector<real, N> x;
|
|
for (int dim = 0; dim < N; ++dim) x(dim) = modifiedChebyshevNode(i(dim), out.ext(dim));
|
|
ff.l(i) = f(x);
|
|
}
|
|
bernsteinInterpolate(ff, std::pow(100.0 * std::numeric_limits<real>::epsilon(), 1.0 / N), out);
|
|
}
|
|
|
|
namespace detail
|
|
{
|
|
// Compute the generalised eigenvalues for matrix pair A, B
|
|
// in: N by N square matrices; A, B will be overwritten
|
|
// out: array of length N x 2
|
|
void generalisedEigenvalues(xarray<real, 2>& A, xarray<real, 2>& B, xarray<real, 2>& out)
|
|
{
|
|
int N = A.ext(0);
|
|
assert(all(A.ext() == N) && all(B.ext() == N) && out.ext(0) == N && out.ext(1) == 2);
|
|
real *alphar, *alphai, *beta, *lscale, *rscale;
|
|
algoim_spark_alloc(real, &alphar, N, &alphai, N, &beta, N, &lscale, N, &rscale, N);
|
|
real abnrm, bbnrm;
|
|
int ilo, ihi;
|
|
static_assert(std::is_same_v<real, double>,
|
|
"Algoim's default LAPACK code assumes real == double; a custom generalised eigenvalue solver is required "
|
|
"when real != double");
|
|
int info = LAPACKE_dggevx(LAPACK_ROW_MAJOR, 'B', 'N', 'N', 'N', N, A.data(), N, B.data(), N, alphar, alphai, beta, nullptr,
|
|
N, nullptr, N, &ilo, &ihi, lscale, rscale, &abnrm, &bbnrm, nullptr, nullptr);
|
|
assert(info == 0 && "LAPACKE_dggevx call failed (algoim::bernstein::detail::generalisedEigenvalues)");
|
|
for (int i = 0; i < N; ++i) {
|
|
if (beta[i] != 0.0)
|
|
out(i, 0) = alphar[i] / beta[i], out(i, 1) = alphai[i] / beta[i];
|
|
else
|
|
out(i, 0) = out(i, 1) = std::numeric_limits<real>::infinity();
|
|
}
|
|
}
|
|
} // namespace detail
|
|
|
|
// Compute all complex roots of a Bernstein polynomial
|
|
// alpha: array of length P
|
|
// out: array of length (P-1) x 2
|
|
void rootsBernsteinPoly(const real* alpha, int P, xarray<real, 2>& out)
|
|
{
|
|
assert(P >= 2 && out.ext(0) == P - 1 && out.ext(1) == 2);
|
|
using std::abs;
|
|
using std::max;
|
|
|
|
real* beta;
|
|
algoim_spark_alloc(real, &beta, P);
|
|
real tol = 0.0;
|
|
for (int i = 0; i < P; ++i) tol = max(tol, abs(alpha[i]));
|
|
tol *= util::sqr(std::numeric_limits<real>::epsilon());
|
|
for (int i = 0; i < P; ++i) beta[i] = (abs(alpha[i]) > tol) ? alpha[i] : 0;
|
|
|
|
int N = P - 1;
|
|
xarray<real, 2> A(nullptr, uvector<int, 2>{N, N});
|
|
xarray<real, 2> B(nullptr, uvector<int, 2>{N, N});
|
|
algoim_spark_alloc(real, A, B);
|
|
A = 0;
|
|
B = 0;
|
|
for (int i = 0; i < N - 1; ++i) A(i, i + 1) = B(i, i + 1) = 1.0;
|
|
for (int i = 0; i < N; ++i) A(N - 1, i) = B(N - 1, i) = -beta[i];
|
|
B(N - 1, N - 1) += beta[N] / N;
|
|
for (int i = 0; i < N - 1; ++i) B(i, i) = real(N - i) / real(i + 1);
|
|
|
|
detail::generalisedEigenvalues(A, B, out);
|
|
}
|
|
|
|
namespace detail
|
|
{
|
|
// Newton's method safeguarded by a standard bisection method; in Bernstein application it
|
|
// is only be applied to a Bernstein polynomial guaranteed to have just one real root
|
|
template <typename F>
|
|
bool newtonBisectionSearch(const F& f, real x0, real x1, real tol, int maxsteps, real& root)
|
|
{
|
|
using std::abs;
|
|
real f0, f1, dummy;
|
|
f(x0, f0, dummy);
|
|
f(x1, f1, dummy);
|
|
if ((f0 > 0.0 && f1 > 0.0) || (f0 < 0.0 && f1 < 0.0)) return false;
|
|
if (f0 == real(0.0)) {
|
|
root = x0;
|
|
return true;
|
|
}
|
|
if (f1 == real(0.0)) {
|
|
root = x1;
|
|
return true;
|
|
}
|
|
|
|
// x0 and x1 define the bracket; x0 always corresponds to negative value of f; x1 positive value of f
|
|
if (f1 < 0.0) std::swap(x0, x1);
|
|
|
|
// Initial guess is midpoint
|
|
real x = (x0 + x1) * 0.5;
|
|
real fx, fpx;
|
|
f(x, fx, fpx);
|
|
real dx = x1 - x0;
|
|
for (int step = 0; step < maxsteps; ++step) {
|
|
if ((fpx * (x - x0) - fx) * (fpx * (x - x1) - fx) < 0.0 && abs(fx) < abs(dx * fpx) * 0.5) {
|
|
// Step in Newton's method falls within bracket and is less than half the previous step size
|
|
dx = -fx / fpx;
|
|
real xold = x;
|
|
x += dx;
|
|
if (xold == x) {
|
|
root = x;
|
|
return true;
|
|
}
|
|
} else {
|
|
// Revert to bisection
|
|
dx = (x1 - x0) * 0.5;
|
|
x = x0 + dx;
|
|
if (x == x0) {
|
|
root = x;
|
|
return true;
|
|
}
|
|
}
|
|
if (abs(dx) < tol) {
|
|
root = x;
|
|
return true;
|
|
}
|
|
f(x, fx, fpx);
|
|
if (fx == real(0.0)) // Got very lucky
|
|
{
|
|
root = x;
|
|
return true;
|
|
}
|
|
if (fx < 0.0)
|
|
x0 = x;
|
|
else
|
|
x1 = x;
|
|
}
|
|
return false;
|
|
}
|
|
} // namespace detail
|
|
|
|
// Compute, if possible, a simple real root in [0,1] of a Bernstein polynomial using
|
|
// Descartes' rule of signs:
|
|
// - if it can be guaranteed that there is exactly 0 roots, 0 is returned
|
|
// - if it can be guaranteed that there is exactly 1 root, and that root has
|
|
// been calculated to full precision using Newton's method, 1 is returned
|
|
// - if some coefficients are close to zero (thereby preventing a reliable use of
|
|
// Descartes' rule), -1 is returned
|
|
// - if no other guarantees can be made, -1 is returned
|
|
int bernsteinSimpleRoot(const real* alpha, int P, real tol, real& root)
|
|
{
|
|
assert(P >= 2);
|
|
using std::abs;
|
|
for (int i = 0; i < P; ++i)
|
|
if (abs(alpha[i]) < tol) return -1;
|
|
int count = 0;
|
|
for (int i = 1; i < P; ++i)
|
|
if (alpha[i - 1] < 0 && alpha[i] >= 0 || alpha[i - 1] >= 0 && alpha[i] < 0) ++count;
|
|
if (count == 0) return 0;
|
|
if (count > 1) return -1;
|
|
real newton_tol = 10.0 * std::numeric_limits<real>::epsilon();
|
|
const real* binom = Binomial::row(P - 1);
|
|
bool b = detail::newtonBisectionSearch(
|
|
[=](real x, real& value, real& prime) { bernsteinValueAndDerivative(alpha, P, binom, x, value, prime); }, 0, 1,
|
|
newton_tol, 12, root);
|
|
return b ? 1 : -1;
|
|
}
|
|
|
|
// Compute real roots of a Bernstein polynomial using a bisection + Newton's method approach.
|
|
// Returns the number of real roots computed (and recorded in out, a buffer of size at least
|
|
// P - 1), or -1 if failed
|
|
int rootsBernsteinPolyFast(const xarray<real, 1>& alpha, real a, real b, int depth, real tol, real* out)
|
|
{
|
|
// Try simple root method
|
|
real root;
|
|
int res = bernsteinSimpleRoot(alpha.data(), alpha.ext(0), tol, root);
|
|
// If it worked with a guarantee of no roots, return
|
|
if (res == 0) return 0;
|
|
// If it worked with a guarantee of just one root computed accurately,
|
|
// transform that root to the [a,b] interval, record it, and return
|
|
if (res == 1) {
|
|
*out = a + (b - a) * root;
|
|
return 1;
|
|
}
|
|
// Otherwise, the simple root method failed. Apply bisection, provided not already too deep
|
|
if (depth >= 4) return -1;
|
|
xarray<real, 1> beta(nullptr, alpha.ext());
|
|
algoim_spark_alloc(real, beta);
|
|
// Apply to left half
|
|
beta = alpha;
|
|
deCasteljauLeft(beta, 0.5);
|
|
int r1 = rootsBernsteinPolyFast(beta, a, a + (b - a) * 0.5, depth + 1, tol, out);
|
|
if (r1 < 0) return -1;
|
|
// Apply to right half, shifting buffer by r1
|
|
beta = alpha;
|
|
deCasteljauRight(beta, 0.5);
|
|
int r2 = rootsBernsteinPolyFast(beta, a + (b - a) * 0.5, b, depth + 1, tol, out + r1);
|
|
if (r2 < 0) return -1;
|
|
return r1 + r2;
|
|
}
|
|
|
|
// Apply generalised eigenvalue method to compute the real roots of alpha in the interval [0,1],
|
|
// returning the number of roots recorded in 'out', a buffer of size at least P - 1
|
|
int bernsteinUnitIntervalRealRoots_eigenvalue(const real* alpha, int P, real* out)
|
|
{
|
|
using std::abs;
|
|
xarray<real, 2> roots(nullptr, uvector<int, 2>{P - 1, 2});
|
|
algoim_spark_alloc(real, roots);
|
|
rootsBernsteinPoly(alpha, P, roots);
|
|
real tol = 1.0e4 * std::numeric_limits<real>::epsilon(); // nearly-real-root tolerance
|
|
int count = 0;
|
|
for (int j = 0; j < P - 1; ++j) {
|
|
if (0 <= roots(j, 0) && roots(j, 0) <= 1 && abs(roots(j, 1)) < tol) {
|
|
*(out + count) = roots(j, 0);
|
|
++count;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// Apply a Newton's method-based approach to compute the real roots of alpha in the interval [0,1];
|
|
// if succeeded, returns the number of roots recorded in 'out' (a buffer of size at least P -1);
|
|
// if failed, returns -1
|
|
int bernsteinUnitIntervalRealRoots_fast(const real* alpha, int P, real* out)
|
|
{
|
|
using std::abs;
|
|
using std::max;
|
|
// Compute a tolerance by which to declare a nearly-zero coefficient as being
|
|
// too close to zero (for Descartes' rule of signs and to avoid problems where
|
|
// a root lies close to a subinterval endpoint which can confuse bisection)
|
|
real tol = 0;
|
|
for (int i = 0; i < P; ++i) tol = max(tol, abs(alpha[i]));
|
|
tol *= 1.0e4 * std::numeric_limits<real>::epsilon(); // nearly zero coeff tolerance, can be loose
|
|
return rootsBernsteinPolyFast(xarray<real, 1>(const_cast<real*>(alpha), P), 0, 1, 0, tol, out);
|
|
}
|
|
|
|
// Driver method to compute the real roots of a Bernstein polynomial in the interval [0,1];
|
|
// the method first tries a fast approach, which succeeds in the vast majority of cases and is
|
|
// anywhere between 10x and 100x faster than the backup approach; if the fast approach fails,
|
|
// the backup method is applied. Returns the number of computed roots, recorded in the buffer
|
|
// 'out' of size at least P - 1
|
|
int bernsteinUnitIntervalRealRoots(const real* alpha, int P, real* out)
|
|
{
|
|
using std::sqrt;
|
|
if (P == 1) return 0;
|
|
|
|
// Direct method for linear polynomials
|
|
if (P == 2) {
|
|
if (alpha[0] == alpha[1]) return 0;
|
|
real x = alpha[0] / (alpha[0] - alpha[1]);
|
|
if (x < 0 || x > 1) return 0;
|
|
if (std::isnan(x)) {
|
|
int aaa = 1;
|
|
int bbb = 1;
|
|
}
|
|
*out = x;
|
|
return 1;
|
|
}
|
|
|
|
// Direct method for quadratic polynomials, using numerically-stable quadratic formula
|
|
if (P == 3) {
|
|
real a = alpha[0] - alpha[1] * 2 + alpha[2];
|
|
real b = (alpha[1] - alpha[0]) * 2;
|
|
real c = alpha[0];
|
|
real delta = b * b - a * c * 4;
|
|
if (delta < 0) return 0;
|
|
real q = -0.5 * (b + (b >= 0 ? sqrt(delta) : -sqrt(delta)));
|
|
real r1 = q / a;
|
|
real r2 = c / q;
|
|
int count = 0;
|
|
if (0 <= r1 && r1 <= 1) {
|
|
*out = r1;
|
|
++count;
|
|
}
|
|
if (0 <= r2 && r2 <= 1) {
|
|
*(out + count) = r2;
|
|
++count;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// Apply fast method, if possible, and resort to eigenvalue method if it fails
|
|
int count = bernsteinUnitIntervalRealRoots_fast(alpha, P, out);
|
|
if (count >= 0) return count;
|
|
return bernsteinUnitIntervalRealRoots_eigenvalue(alpha, P, out);
|
|
}
|
|
|
|
// Build Sylvester matrix for Bernstein polynomials of degrees P-1 and Q-1
|
|
// out: square matrix of dimensions P + Q - 2
|
|
void sylvesterMatrix(const real* a, int P, const real* b, int Q, xarray<real, 2>& out)
|
|
{
|
|
assert(P >= 1 && Q >= 1 && P + Q >= 3 && out.ext(0) == P + Q - 2 && out.ext(1) == P + Q - 2);
|
|
const real* bP = Binomial::row(P - 1);
|
|
const real* bQ = Binomial::row(Q - 1);
|
|
const real* bPQ = Binomial::row(P + Q - 3);
|
|
out = 0;
|
|
for (int i = 0; i < Q - 1; ++i)
|
|
for (int j = 0; j < P; ++j) out(i, j + i) = a[j] * (bP[j] / bPQ[j + i]);
|
|
for (int i = 0; i < P - 1; ++i)
|
|
for (int j = 0; j < Q; ++j) out(i + Q - 1, j + i) = b[j] * (bQ[j] / bPQ[j + i]);
|
|
}
|
|
|
|
// Build Bezout matrix for Bernstein polynomials of equal degree P-1
|
|
// out: square matrix of dimensions P - 1
|
|
void bezoutMatrix(const real* a, const real* b, int P, xarray<real, 2>& out)
|
|
{
|
|
assert(P >= 2 && out.ext(0) == P - 1 && out.ext(1) == P - 1);
|
|
const int n = P - 1;
|
|
out = 0;
|
|
for (int i = 1; i <= n; ++i) out(i - 1, 0) = (a[i] * b[0] - a[0] * b[i]) * real(n) / real(i);
|
|
for (int j = 1; j <= n - 1; ++j) out(n - 1, j) = (a[n] * b[j] - a[j] * b[n]) * real(n) / real(n - j);
|
|
for (int i = n - 1; i >= 1; --i)
|
|
for (int j = 1; j <= i - 1; ++j)
|
|
out(i - 1, j) = (a[i] * b[j] - a[j] * b[i]) * real(n * n) / real(i * (n - j))
|
|
+ out(i, j - 1) * real(j * (n - i)) / real(i * (n - j));
|
|
for (int i = 0; i < n; ++i)
|
|
for (int j = i + 1; j < n; ++j) out(i, j) = out(j, i);
|
|
}
|
|
} // namespace algoim::bernstein
|
|
|
|
#endif
|
|
|