brep2sdf/code/pre_processing/ParametricSample/Mathematics/SingularValueDecomposition.h


								// David Eberly, Geometric Tools, Redmond WA 98052

								// Copyright (c) 1998-2021

								// Distributed under the Boost Software License, Version 1.0.

								// https://www.boost.org/LICENSE_1_0.txt

								// https://www.geometrictools.com/License/Boost/LICENSE_1_0.txt

								// Version: 4.0.2019.08.13


								#pragma once


								#include <Mathematics/Math.h>

								#include <Mathematics/RangeIteration.h>

								#include <algorithm>

								#include <cstring>

								#include <vector>


								// The SingularValueDecomposition class is an implementation of Algorithm

								// 8.3.2 (The SVD Algorithm) described in "Matrix Computations, 2nd

								// edition" by G. H. Golub and Charles F. Van Loan, The Johns Hopkins

								// Press, Baltimore MD, Fourth Printing 1993.  Algorithm 5.4.2 (Householder

								// Bidiagonalization) is used to reduce A to bidiagonal B.  Algorithm 8.3.1

								// (Golub-Kahan SVD Step) is used for the iterative reduction from bidiagonal

								// to diagonal.  If A is the original matrix, S is the matrix whose diagonal

								// entries are the singular values, and U and V are corresponding matrices,

								// then theoretically U^T*A*V = S.  Numerically, we have errors

								// E = U^T*A*V - S.  Algorithm 8.3.2 mentions that one expects |E| is

								// approximately u*|A|, where |M| denotes the Frobenius norm of M and where

								// u is the unit roundoff for the floating-point arithmetic: 2^{-23} for

								// 'float', which is FLT_EPSILON = 1.192092896e-7f, and 2^{-52} for'double',

								// which is DBL_EPSILON = 2.2204460492503131e-16.

								//

								// The condition |a(i,i+1)| <= epsilon*(|a(i,i) + a(i+1,i+1)|) used to

								// determine when the reduction decouples to smaller problems is implemented

								// as:  sum = |a(i,i)| + |a(i+1,i+1)|; sum + |a(i,i+1)| == sum.  The idea is

								// that the superdiagonal term is small relative to its diagonal neighbors,

								// and so it is effectively zero.  The unit tests have shown that this

								// interpretation of decoupling is effective.

								//

								// The condition |a(i,i)| <= epsilon*|B| used to determine when the

								// reduction decouples (with a zero singular value) is implemented using

								// the Frobenius norm of B and epsilon = multiplier*u, where for now the

								// multiplier is hard-coded in Solve(...) as 8.

								//

								// The authors suggest that once you have the bidiagonal matrix, a practical

								// implementation will store the diagonal and superdiagonal entries in linear

								// arrays, ignoring the theoretically zero values not in the 2-band.  This is

								// good for cache coherence, and we have used the suggestion.  The essential

								// parts of the Householder u-vectors are stored in the lower-triangular

								// portion of the matrix and the essential parts of the Householder v-vectors

								// are stored in the upper-triangular portion of the matrix.  To avoid having

								// to recompute 2/Dot(u,u) and 2/Dot(v,v) when constructing orthogonal U and

								// V, we store these quantities in additional memory during bidiagonalization.

								//

								// For matrices with randomly generated values in [0,1], the unit tests

								// produce the following information for N-by-N matrices.

								//

								// N  |A|     |E|        |E|/|A|    iterations

								// -------------------------------------------

								//  2  1.4831 4.1540e-16 2.8007e-16  1

								//  3  2.1065 3.5024e-16 1.6626e-16  4

								//  4  2.4979 7.4605e-16 2.9867e-16  6

								//  5  3.6591 1.8305e-15 5.0025e-16  9

								//  6  4.0572 2.0571e-15 5.0702e-16 10

								//  7  4.7745 2.9057e-15 6.0859e-16 12

								//  8  5.1964 2.7958e-15 5.3803e-16 13

								//  9  5.7599 3.3128e-15 5.7514e-16 16

								// 10  6.2700 3.7209e-15 5.9344e-16 16

								// 11  6.8220 5.0580e-15 7.4142e-16 18

								// 12  7.4540 5.2493e-15 7.0422e-16 21

								// 13  8.1225 5.6043e-15 6.8997e-16 24

								// 14  8.5883 5.8553e-15 6.8177e-16 26

								// 15  9.1337 6.9663e-15 7.6270e-16 27

								// 16  9.7884 9.1358e-15 9.3333e-16 29

								// 17 10.2407 8.2715e-15 8.0771e-16 34

								// 18 10.7147 8.9748e-15 8.3761e-16 33

								// 19 11.1887 1.0094e-14 9.0220e-16 32

								// 20 11.7739 9.7000e-15 8.2386e-16 35

								// 21 12.2822 1.1217e-14 9.1329e-16 36

								// 22 12.7649 1.1071e-14 8.6732e-16 37

								// 23 13.3366 1.1271e-14 8.4513e-16 41

								// 24 13.8505 1.2806e-14 9.2460e-16 43

								// 25 14.4332 1.3081e-14 9.0637e-16 43

								// 26 14.9301 1.4882e-14 9.9680e-16 46

								// 27 15.5214 1.5571e-14 1.0032e-15 48

								// 28 16.1029 1.7553e-14 1.0900e-15 49

								// 29 16.6407 1.6219e-14 9.7467e-16 53

								// 30 17.1891 1.8560e-14 1.0797e-15 55

								// 31 17.7773 1.8522e-14 1.0419e-15 56

								//

								// The singularvalues and |E|/|A| values were compared to those generated by

								// Mathematica Version 9.0; Wolfram Research, Inc., Champaign IL, 2012.

								// The results were all comparable with singular values agreeing to a large

								// number of decimal places.

								//

								// Timing on an Intel (R) Core (TM) i7-3930K CPU @ 3.20 GHZ (in seconds)

								// for NxN matrices:

								//

								// N    |E|/|A|    iters bidiag  QR     U-and-V    comperr

								// -------------------------------------------------------

								//  512 3.8632e-15  848   0.341  0.016    1.844      2.203

								// 1024 5.6456e-15 1654   4.279  0.032   18.765     20.844

								// 2048 7.5499e-15 3250  40.421  0.141  186.607    213.216

								//

								// where iters is the number of QR steps taken, bidiag is the computation

								// of the Householder reflection vectors, U-and-V is the composition of

								// Householder reflections and Givens rotations to obtain the orthogonal

								// matrices of the decomposigion, and comperr is the computation E =

								// U^T*A*V - S.


								namespace gte

								{

								    template <typename Real>

								    class SingularValueDecomposition

								    {

								    public:

								        // The solver processes MxN symmetric matrices, where M >= N > 1

								        // ('numRows' is M and 'numCols' is N) and the matrix is stored in

								        // row-major order.  The maximum number of iterations

								        // ('maxIterations') must be specified for the reduction of a

								        // bidiagonal matrix to a diagonal matrix.  The goal is to compute

								        // MxM orthogonal U, NxN orthogonal V and MxN matrix S for which

								        // U^T*A*V = S.  The only nonzero entries of S are on the diagonal;

								        // the diagonal entries are the singular values of the original

								        // matrix.

								        SingularValueDecomposition(int numRows, int numCols, unsigned int maxIterations)

								            :

								            mNumRows(0),

								            mNumCols(0),

								            mMaxIterations(0)

								        {

								            if (numCols > 1 && numRows >= numCols && maxIterations > 0)

								            {

								                mNumRows = numRows;

								                mNumCols = numCols;

								                mMaxIterations = maxIterations;

								                mMatrix.resize(numRows * numCols);

								                mDiagonal.resize(numCols);

								                mSuperdiagonal.resize(numCols - 1);

								                mRGivens.reserve(maxIterations * (numCols - 1));

								                mLGivens.reserve(maxIterations * (numCols - 1));

								                mFixupDiagonal.resize(numCols);

								                mPermutation.resize(numCols);

								                mVisited.resize(numCols);

								                mTwoInvUTU.resize(numCols);

								                mTwoInvVTV.resize(numCols - 2);

								                mUVector.resize(numRows);

								                mVVector.resize(numCols);

								                mWVector.resize(numRows);

								            }

								        }


								        // A copy of the MxN input is made internally.  The order of the

								        // singular values is specified by sortType: -1 (decreasing),

								        // 0 (no sorting), or +1 (increasing).  When sorted, the columns of

								        // the orthogonal matrices are ordered accordingly.  The return value

								        // is the number of iterations consumed when convergence occurred,

								        // 0xFFFFFFFF when convergence did not occur or 0 when N <= 1 or

								        // M < N was passed to the constructor.

								        unsigned int Solve(Real const* input, int sortType)

								        {

								            if (mNumRows > 0)

								            {

								                int numElements = mNumRows * mNumCols;

								                std::copy(input, input + numElements, mMatrix.begin());

								                Bidiagonalize();


								                // Compute 'threshold = multiplier*epsilon*|B|' as the

								                // threshold for diagonal entries effectively zero; that is,

								                // |d| <= |threshold| implies that d is (effectively) zero.

								                // TODO: Allow the caller to pass 'multiplier' to the

								                // constructor.

								                //

								                // We will use the L2-norm |B|, which is the length of the

								                // elements of B treated as an NM-tuple.  The following code

								                // avoids overflow when accumulating the squares of the

								                // elements when those elements are large.

								                Real maxAbsComp = std::fabs(input[0]);

								                for (int i = 1; i < numElements; ++i)

								                {

								                    Real absComp = std::fabs(input[i]);

								                    if (absComp > maxAbsComp)

								                    {

								                        maxAbsComp = absComp;

								                    }

								                }


								                Real norm = (Real)0;

								                if (maxAbsComp > (Real)0)

								                {

								                    Real invMaxAbsComp = ((Real)1) / maxAbsComp;

								                    for (int i = 0; i < numElements; ++i)

								                    {

								                        Real ratio = input[i] * invMaxAbsComp;

								                        norm += ratio * ratio;

								                    }

								                    norm = maxAbsComp * std::sqrt(norm);

								                }


								                Real const multiplier = (Real)8;  // TODO: Expose to caller.

								                Real const epsilon = std::numeric_limits<Real>::epsilon();

								                Real const threshold = multiplier * epsilon * norm;


								                mRGivens.clear();

								                mLGivens.clear();

								                for (unsigned int j = 0; j < mMaxIterations; ++j)

								                {

								                    int imin = -1, imax = -1;

								                    for (int i = mNumCols - 2; i >= 0; --i)

								                    {

								                        // When a01 is much smaller than its diagonal

								                        // neighbors, it is effectively zero.

								                        Real a00 = mDiagonal[i];

								                        Real a01 = mSuperdiagonal[i];

								                        Real a11 = mDiagonal[i + 1];

								                        Real sum = std::fabs(a00) + std::fabs(a11);

								                        if (sum + std::fabs(a01) != sum)

								                        {

								                            if (imax == -1)

								                            {

								                                imax = i;

								                            }

								                            imin = i;

								                        }

								                        else

								                        {

								                            // The superdiagonal term is effectively zero

								                            // compared to the neighboring diagonal terms.

								                            if (imin >= 0)

								                            {

								                                break;

								                            }

								                        }

								                    }


								                    if (imax == -1)

								                    {

								                        // The algorithm has converged.

								                        EnsureNonnegativeDiagonal();

								                        ComputePermutation(sortType);

								                        return j;

								                    }


								                    // We need to test diagonal entries of B for zero.  For

								                    // each zero diagonal entry, zero the superdiagonal.

								                    if (DiagonalEntriesNonzero(imin, imax, threshold))

								                    {

								                        // Process the lower-right-most unreduced bidiagonal

								                        // block.

								                        DoGolubKahanStep(imin, imax);

								                    }

								                }

								                return 0xFFFFFFFF;

								            }

								            else

								            {

								                return 0;

								            }

								        }


								        // Get the singular values of the matrix passed to Solve(...).  The

								        // input 'singularValues' must have N elements.

								        void GetSingularValues(Real* singularValues) const

								        {

								            if (singularValues && mNumCols > 0)

								            {

								                if (mPermutation[0] >= 0)

								                {

								                    // Sorting was requested.

								                    for (int i = 0; i < mNumCols; ++i)

								                    {

								                        int p = mPermutation[i];

								                        singularValues[i] = mDiagonal[p];

								                    }

								                }

								                else

								                {

								                    // Sorting was not requested.

								                    size_t numBytes = mNumCols * sizeof(Real);

								                    std::memcpy(singularValues, &mDiagonal[0], numBytes);

								                }

								            }

								        }


								        // Accumulate the Householder reflections, the Givens rotations, and

								        // the diagonal fix-up matrix to compute the orthogonal matrices U and

								        // V for which U^T*A*V = S.  The input uMatrix must be MxM and the

								        // input vMatrix must be NxN, both stored in row-major order.

								        void GetU(Real* uMatrix) const

								        {

								            if (!uMatrix || mNumCols == 0)

								            {

								                // Invalid input or the constructor failed.

								                return;

								            }


								            // Start with the identity matrix.

								            std::fill(uMatrix, uMatrix + mNumRows * mNumRows, (Real)0);

								            for (int d = 0; d < mNumRows; ++d)

								            {

								                uMatrix[d + mNumRows * d] = (Real)1;

								            }


								            // Multiply the Householder reflections using backward

								            // accumulation.

								            int r, c;

								            for (int i0 = mNumCols - 1, i1 = i0 + 1; i0 >= 0; --i0, --i1)

								            {

								                // Copy the u vector and 2/Dot(u,u) from the matrix.

								                Real twoinvudu = mTwoInvUTU[i0];

								                Real const* column = &mMatrix[i0];

								                mUVector[i0] = (Real)1;

								                for (r = i1; r < mNumRows; ++r)

								                {

								                    mUVector[r] = column[mNumCols * r];

								                }


								                // Compute the w vector.

								                mWVector[i0] = twoinvudu;

								                for (r = i1; r < mNumRows; ++r)

								                {

								                    mWVector[r] = (Real)0;

								                    for (c = i1; c < mNumRows; ++c)

								                    {

								                        mWVector[r] += mUVector[c] * uMatrix[r + mNumRows * c];

								                    }

								                    mWVector[r] *= twoinvudu;

								                }


								                // Update the matrix, U <- U - u*w^T.

								                for (r = i0; r < mNumRows; ++r)

								                {

								                    for (c = i0; c < mNumRows; ++c)

								                    {

								                        uMatrix[c + mNumRows * r] -= mUVector[r] * mWVector[c];

								                    }

								                }

								            }


								            // Multiply the Givens rotations.

								            for (auto const& givens : mLGivens)

								            {

								                int j0 = givens.index0;

								                int j1 = givens.index1;

								                for (r = 0; r < mNumRows; ++r, j0 += mNumRows, j1 += mNumRows)

								                {

								                    Real& q0 = uMatrix[j0];

								                    Real& q1 = uMatrix[j1];

								                    Real prd0 = givens.cs * q0 - givens.sn * q1;

								                    Real prd1 = givens.sn * q0 + givens.cs * q1;

								                    q0 = prd0;

								                    q1 = prd1;

								                }

								            }


								            if (mPermutation[0] >= 0)

								            {

								                // Sorting was requested.

								                std::fill(mVisited.begin(), mVisited.end(), 0);

								                for (c = 0; c < mNumCols; ++c)

								                {

								                    if (mVisited[c] == 0 && mPermutation[c] != c)

								                    {

								                        // The item starts a cycle with 2 or more elements.

								                        int start = c, current = c, next;

								                        for (r = 0; r < mNumRows; ++r)

								                        {

								                            mWVector[r] = uMatrix[c + mNumRows * r];

								                        }

								                        while ((next = mPermutation[current]) != start)

								                        {

								                            mVisited[current] = 1;

								                            for (r = 0; r < mNumRows; ++r)

								                            {

								                                uMatrix[current + mNumRows * r] =

								                                    uMatrix[next + mNumRows * r];

								                            }

								                            current = next;

								                        }

								                        mVisited[current] = 1;

								                        for (r = 0; r < mNumRows; ++r)

								                        {

								                            uMatrix[current + mNumRows * r] = mWVector[r];

								                        }

								                    }

								                }

								            }

								        }


								        void GetV(Real* vMatrix) const

								        {

								            if (!vMatrix || mNumCols == 0)

								            {

								                // Invalid input or the constructor failed.

								                return;

								            }


								            // Start with the identity matrix.

								            std::fill(vMatrix, vMatrix + mNumCols * mNumCols, (Real)0);

								            for (int d = 0; d < mNumCols; ++d)

								            {

								                vMatrix[d + mNumCols * d] = (Real)1;

								            }


								            // Multiply the Householder reflections using backward accumulation.

								            int i0 = mNumCols - 3;

								            int i1 = i0 + 1;

								            int i2 = i0 + 2;

								            int r, c;

								            for (/**/; i0 >= 0; --i0, --i1, --i2)

								            {

								                // Copy the v vector and 2/Dot(v,v) from the matrix.

								                Real twoinvvdv = mTwoInvVTV[i0];

								                Real const* row = &mMatrix[mNumCols * i0];

								                mVVector[i1] = (Real)1;

								                for (r = i2; r < mNumCols; ++r)

								                {

								                    mVVector[r] = row[r];

								                }


								                // Compute the w vector.

								                mWVector[i1] = twoinvvdv;

								                for (r = i2; r < mNumCols; ++r)

								                {

								                    mWVector[r] = (Real)0;

								                    for (c = i2; c < mNumCols; ++c)

								                    {

								                        mWVector[r] += mVVector[c] * vMatrix[r + mNumCols * c];

								                    }

								                    mWVector[r] *= twoinvvdv;

								                }


								                // Update the matrix, V <- V - v*w^T.

								                for (r = i1; r < mNumCols; ++r)

								                {

								                    for (c = i1; c < mNumCols; ++c)

								                    {

								                        vMatrix[c + mNumCols * r] -= mVVector[r] * mWVector[c];

								                    }

								                }

								            }


								            // Multiply the Givens rotations.

								            for (auto const& givens : mRGivens)

								            {

								                int j0 = givens.index0;

								                int j1 = givens.index1;

								                for (c = 0; c < mNumCols; ++c, j0 += mNumCols, j1 += mNumCols)

								                {

								                    Real& q0 = vMatrix[j0];

								                    Real& q1 = vMatrix[j1];

								                    Real prd0 = givens.cs * q0 - givens.sn * q1;

								                    Real prd1 = givens.sn * q0 + givens.cs * q1;

								                    q0 = prd0;

								                    q1 = prd1;

								                }

								            }


								            // Fix-up the diagonal.

								            for (r = 0; r < mNumCols; ++r)

								            {

								                for (c = 0; c < mNumCols; ++c)

								                {

								                    vMatrix[c + mNumCols * r] *= mFixupDiagonal[c];

								                }

								            }


								            if (mPermutation[0] >= 0)

								            {

								                // Sorting was requested.

								                std::fill(mVisited.begin(), mVisited.end(), 0);

								                for (c = 0; c < mNumCols; ++c)

								                {

								                    if (mVisited[c] == 0 && mPermutation[c] != c)

								                    {

								                        // The item starts a cycle with 2 or more elements.

								                        int start = c, current = c, next;

								                        for (r = 0; r < mNumCols; ++r)

								                        {

								                            mWVector[r] = vMatrix[c + mNumCols * r];

								                        }

								                        while ((next = mPermutation[current]) != start)

								                        {

								                            mVisited[current] = 1;

								                            for (r = 0; r < mNumCols; ++r)

								                            {

								                                vMatrix[current + mNumCols * r] =

								                                    vMatrix[next + mNumCols * r];

								                            }

								                            current = next;

								                        }

								                        mVisited[current] = 1;

								                        for (r = 0; r < mNumCols; ++r)

								                        {

								                            vMatrix[current + mNumCols * r] = mWVector[r];

								                        }

								                    }

								                }

								            }

								        }


								        // Compute a single column of U or V.  The reflections and rotations

								        // are applied incrementally.  This is useful when you want only a

								        // small number of the singular values or vectors.

								        void GetUColumn(int index, Real* uColumn) const

								        {

								            if (0 <= index && index < mNumRows)

								            {

								                // y = H*x, then x and y are swapped for the next H

								                Real* x = uColumn;

								                Real* y = &mWVector[0];


								                // Start with the Euclidean basis vector.

								                std::memset(x, 0, mNumRows * sizeof(Real));

								                if (index < mNumCols && mPermutation[0] >= 0)

								                {

								                    // Sorting was requested.

								                    x[mPermutation[index]] = (Real)1;

								                }

								                else

								                {

								                    x[index] = (Real)1;

								                }


								                // Apply the Givens rotations.

								                for (auto const& givens : gte::reverse(mLGivens))

								                {

								                    Real& xr0 = x[givens.index0];

								                    Real& xr1 = x[givens.index1];

								                    Real tmp0 = givens.cs * xr0 + givens.sn * xr1;

								                    Real tmp1 = -givens.sn * xr0 + givens.cs * xr1;

								                    xr0 = tmp0;

								                    xr1 = tmp1;

								                }


								                // Apply the Householder reflections.

								                for (int c = mNumCols - 1; c >= 0; --c)

								                {

								                    // Get the Householder vector u.

								                    int r;

								                    for (r = 0; r < c; ++r)

								                    {

								                        y[r] = x[r];

								                    }


								                    // Compute s = Dot(x,u) * 2/u^T*u.

								                    Real s = x[r];  // r = c, u[r] = 1

								                    for (int j = r + 1; j < mNumRows; ++j)

								                    {

								                        s += x[j] * mMatrix[c + mNumCols * j];

								                    }

								                    s *= mTwoInvUTU[c];


								                    // r = c, y[r] = x[r]*u[r] - s = x[r] - s because u[r] = 1

								                    y[r] = x[r] - s;


								                    // Compute the remaining components of y.

								                    for (++r; r < mNumRows; ++r)

								                    {

								                        y[r] = x[r] - s * mMatrix[c + mNumCols * r];

								                    }


								                    std::swap(x, y);

								                }


								                // The final product is stored in x.

								                if (x != uColumn)

								                {

								                    size_t numBytes = mNumRows * sizeof(Real);

								                    std::memcpy(uColumn, x, numBytes);

								                }

								            }

								        }


								        void GetVColumn(int index, Real* vColumn) const

								        {

								            if (0 <= index && index < mNumCols)

								            {

								                // y = H*x, then x and y are swapped for the next H

								                Real* x = vColumn;

								                Real* y = &mWVector[0];


								                // Start with the Euclidean basis vector.

								                std::memset(x, 0, mNumCols * sizeof(Real));

								                if (mPermutation[0] >= 0)

								                {

								                    // Sorting was requested.

								                    int p = mPermutation[index];

								                    x[p] = mFixupDiagonal[p];

								                }

								                else

								                {

								                    x[index] = mFixupDiagonal[index];

								                }


								                // Apply the Givens rotations.

								                for (auto const& givens : gte::reverse(mRGivens))

								                {

								                    Real& xr0 = x[givens.index0];

								                    Real& xr1 = x[givens.index1];

								                    Real tmp0 = givens.cs * xr0 + givens.sn * xr1;

								                    Real tmp1 = -givens.sn * xr0 + givens.cs * xr1;

								                    xr0 = tmp0;

								                    xr1 = tmp1;

								                }


								                // Apply the Householder reflections.

								                for (int r = mNumCols - 3; r >= 0; --r)

								                {

								                    // Get the Householder vector v.

								                    int c;

								                    for (c = 0; c < r + 1; ++c)

								                    {

								                        y[c] = x[c];

								                    }


								                    // Compute s = Dot(x,v) * 2/v^T*v.

								                    Real s = x[c];  // c = r+1, v[c] = 1

								                    for (int j = c + 1; j < mNumCols; ++j)

								                    {

								                        s += x[j] * mMatrix[j + mNumCols * r];

								                    }

								                    s *= mTwoInvVTV[r];


								                    // c = r+1, y[c] = x[c]*v[c] - s = x[c] - s

								                    // because v[c] = 1

								                    y[c] = x[c] - s;


								                    // Compute the remaining components of y.

								                    for (++c; c < mNumCols; ++c)

								                    {

								                        y[c] = x[c] - s * mMatrix[c + mNumCols * r];

								                    }


								                    std::swap(x, y);

								                }


								                // The final product is stored in x.

								                if (x != vColumn)

								                {

								                    size_t numBytes = mNumCols * sizeof(Real);

								                    std::memcpy(vColumn, x, numBytes);

								                }

								            }

								        }


								        Real GetSingularValue(int index) const

								        {

								            if (0 <= index && index < mNumCols)

								            {

								                if (mPermutation[0] >= 0)

								                {

								                    // Sorting was requested.

								                    return mDiagonal[mPermutation[index]];

								                }

								                else

								                {

								                    // Sorting was not requested.

								                    return mDiagonal[index];

								                }

								            }

								            else

								            {

								                return (Real)0;

								            }

								        }


								    private:

								        // Bidiagonalize using Householder reflections.  On input, mMatrix is

								        // a copy of the input matrix and has one extra row.  On output, the

								        // diagonal and superdiagonal contain the bidiagonalized results.  The

								        // lower-triangular portion stores the essential parts of the

								        // Householder u vectors (the elements of u after the leading 1-valued

								        // component) and the upper-triangular portion stores the essential

								        // parts of the Householder v vectors.  To avoid recomputing

								        // 2/Dot(u,u) and 2/Dot(v,v), these quantities are stored in

								        // mTwoInvUTU and mTwoInvVTV.

								        void Bidiagonalize()

								        {

								            int r, c;

								            for (int i = 0, ip1 = 1; i < mNumCols; ++i, ++ip1)

								            {

								                // Compute the U-Householder vector.

								                Real length = (Real)0;

								                for (r = i; r < mNumRows; ++r)

								                {

								                    Real ur = mMatrix[i + mNumCols * r];

								                    mUVector[r] = ur;

								                    length += ur * ur;

								                }

								                Real udu = (Real)1;

								                length = std::sqrt(length);

								                if (length > (Real)0)

								                {

								                    Real& u1 = mUVector[i];

								                    Real sgn = (u1 >= (Real)0 ? (Real)1 : (Real)-1);

								                    Real invDenom = (Real)1 / (u1 + sgn * length);

								                    u1 = (Real)1;

								                    for (r = ip1; r < mNumRows; ++r)

								                    {

								                        Real& ur = mUVector[r];

								                        ur *= invDenom;

								                        udu += ur * ur;

								                    }

								                }


								                // Compute the rank-1 offset u*w^T.

								                Real invudu = (Real)1 / udu;

								                Real twoinvudu = invudu * (Real)2;

								                for (c = i; c < mNumCols; ++c)

								                {

								                    mWVector[c] = (Real)0;

								                    for (r = i; r < mNumRows; ++r)

								                    {

								                        mWVector[c] += mMatrix[c + mNumCols * r] * mUVector[r];

								                    }

								                    mWVector[c] *= twoinvudu;

								                }


								                // Update the input matrix.

								                for (r = i; r < mNumRows; ++r)

								                {

								                    for (c = i; c < mNumCols; ++c)

								                    {

								                        mMatrix[c + mNumCols * r] -= mUVector[r] * mWVector[c];

								                    }

								                }


								                if (i < mNumCols - 2)

								                {

								                    // Compute the V-Householder vectors.

								                    length = (Real)0;

								                    for (c = ip1; c < mNumCols; ++c)

								                    {

								                        Real vc = mMatrix[c + mNumCols * i];

								                        mVVector[c] = vc;

								                        length += vc * vc;

								                    }

								                    Real vdv = (Real)1;

								                    length = std::sqrt(length);

								                    if (length > (Real)0)

								                    {

								                        Real& v1 = mVVector[ip1];

								                        Real sgn = (v1 >= (Real)0 ? (Real)1 : (Real)-1);

								                        Real invDenom = (Real)1 / (v1 + sgn * length);

								                        v1 = (Real)1;

								                        for (c = ip1 + 1; c < mNumCols; ++c)

								                        {

								                            Real& vc = mVVector[c];

								                            vc *= invDenom;

								                            vdv += vc * vc;

								                        }

								                    }


								                    // Compute the rank-1 offset w*v^T.

								                    Real invvdv = (Real)1 / vdv;

								                    Real twoinvvdv = invvdv * (Real)2;

								                    for (r = i; r < mNumRows; ++r)

								                    {

								                        mWVector[r] = (Real)0;

								                        for (c = ip1; c < mNumCols; ++c)

								                        {

								                            mWVector[r] += mMatrix[c + mNumCols * r] * mVVector[c];

								                        }

								                        mWVector[r] *= twoinvvdv;

								                    }


								                    // Update the input matrix.

								                    for (r = i; r < mNumRows; ++r)

								                    {

								                        for (c = ip1; c < mNumCols; ++c)

								                        {

								                            mMatrix[c + mNumCols * r] -= mWVector[r] * mVVector[c];

								                        }

								                    }


								                    mTwoInvVTV[i] = twoinvvdv;

								                    for (c = i + 2; c < mNumCols; ++c)

								                    {

								                        mMatrix[c + mNumCols * i] = mVVector[c];

								                    }

								                }


								                mTwoInvUTU[i] = twoinvudu;

								                for (r = ip1; r < mNumRows; ++r)

								                {

								                    mMatrix[i + mNumCols * r] = mUVector[r];

								                }

								            }


								            // Copy the diagonal and subdiagonal for cache coherence in the

								            // Golub-Kahan iterations.

								            int k, ksup = mNumCols - 1, index = 0, delta = mNumCols + 1;

								            for (k = 0; k < ksup; ++k, index += delta)

								            {

								                mDiagonal[k] = mMatrix[index];

								                mSuperdiagonal[k] = mMatrix[index + 1];

								            }

								            mDiagonal[k] = mMatrix[index];

								        }


								        // A helper for generating Givens rotation sine and cosine robustly.

								        void GetSinCos(Real x, Real y, Real& cs, Real& sn)

								        {

								            // Solves sn*x + cs*y = 0 robustly.

								            Real tau;

								            if (y != (Real)0)

								            {

								                if (std::fabs(y) > std::fabs(x))

								                {

								                    tau = -x / y;

								                    sn = (Real)1 / std::sqrt((Real)1 + tau * tau);

								                    cs = sn * tau;

								                }

								                else

								                {

								                    tau = -y / x;

								                    cs = (Real)1 / std::sqrt((Real)1 + tau * tau);

								                    sn = cs * tau;

								                }

								            }

								            else

								            {

								                cs = (Real)1;

								                sn = (Real)0;

								            }

								        }


								        // Test for (effectively) zero-valued diagonal entries through all

								        // but the last. For each such entry, the B matrix decouples. Perform

								        // that decoupling.  If there are no zero-valued entries, then the

								        // Golub-Kahan step must be performed.

								        bool DiagonalEntriesNonzero(int imin, int imax, Real threshold)

								        {

								            for (int i = imin; i <= imax; ++i)

								            {

								                if (std::fabs(mDiagonal[i]) <= threshold)

								                {

								                    // Use planar rotations to case the superdiagonal entry

								                    // out of the matrix, thus producing a row of zeros.

								                    Real x, z, cs, sn;

								                    Real y = mSuperdiagonal[i];

								                    mSuperdiagonal[i] = (Real)0;

								                    for (int j = i + 1; j <= imax + 1; ++j)

								                    {

								                        x = mDiagonal[j];

								                        GetSinCos(x, y, cs, sn);

								                        mLGivens.push_back(GivensRotation(i, j, cs, sn));

								                        mDiagonal[j] = cs * x - sn * y;

								                        if (j <= imax)

								                        {

								                            z = mSuperdiagonal[j];

								                            mSuperdiagonal[j] = cs * z;

								                            y = sn * z;

								                        }

								                    }

								                    return false;

								                }

								            }

								            return true;

								        }


								        // This is Algorithm 8.3.1 in "Matrix Computations, 2nd edition" by

								        // G. H. Golub and C. F. Van Loan.

								        void DoGolubKahanStep(int imin, int imax)

								        {

								            // The implicit shift.  Compute the eigenvalue u of the

								            // lower-right 2x2 block of A = B^T*B that is closer to b11.

								            Real f0 = (imax >= (Real)1 ? mSuperdiagonal[imax - 1] : (Real)0);

								            Real d1 = mDiagonal[imax];

								            Real f1 = mSuperdiagonal[imax];

								            Real d2 = mDiagonal[imax + 1];

								            Real a00 = d1 * d1 + f0 * f0;

								            Real a01 = d1 * f1;

								            Real a11 = d2 * d2 + f1 * f1;

								            Real dif = (a00 - a11) * (Real)0.5;

								            Real sgn = (dif >= (Real)0 ? (Real)1 : (Real)-1);

								            Real a01sqr = a01 * a01;

								            Real u = a11 - a01sqr / (dif + sgn * std::sqrt(dif * dif + a01sqr));

								            Real x = mDiagonal[imin] * mDiagonal[imin] - u;

								            Real y = mDiagonal[imin] * mSuperdiagonal[imin];


								            Real a12, a21, a22, a23, cs, sn;

								            Real a02 = (Real)0;

								            int i0 = imin - 1, i1 = imin, i2 = imin + 1;

								            for (/**/; i1 <= imax; ++i0, ++i1, ++i2)

								            {

								                // Compute the Givens rotation G and save it for use in

								                // computing V in U^T*A*V = S.

								                GetSinCos(x, y, cs, sn);

								                mRGivens.push_back(GivensRotation(i1, i2, cs, sn));


								                // Update B0 = B*G.

								                if (i1 > imin)

								                {

								                    mSuperdiagonal[i0] = cs * mSuperdiagonal[i0] - sn * a02;

								                }


								                a11 = mDiagonal[i1];

								                a12 = mSuperdiagonal[i1];

								                a22 = mDiagonal[i2];

								                mDiagonal[i1] = cs * a11 - sn * a12;

								                mSuperdiagonal[i1] = sn * a11 + cs * a12;

								                mDiagonal[i2] = cs * a22;

								                a21 = -sn * a22;


								                // Update the parameters for the next Givens rotations.

								                x = mDiagonal[i1];

								                y = a21;


								                // Compute the Givens rotation G and save it for use in

								                // computing U in U^T*A*V = S.

								                GetSinCos(x, y, cs, sn);

								                mLGivens.push_back(GivensRotation(i1, i2, cs, sn));


								                // Update B1 = G^T*B0.

								                a11 = mDiagonal[i1];

								                a12 = mSuperdiagonal[i1];

								                a22 = mDiagonal[i2];

								                mDiagonal[i1] = cs * a11 - sn * a21;

								                mSuperdiagonal[i1] = cs * a12 - sn * a22;

								                mDiagonal[i2] = sn * a12 + cs * a22;


								                if (i1 < imax)

								                {

								                    a23 = mSuperdiagonal[i2];

								                    a02 = -sn * a23;

								                    mSuperdiagonal[i2] = cs * a23;


								                    // Update the parameters for the next Givens rotations.

								                    x = mSuperdiagonal[i1];

								                    y = a02;

								                }

								            }

								        }


								        // The diagonal entries are not guaranteed to be nonnegative during

								        // the construction.  After convergence to a diagonal matrix S, test

								        // for negative entries and build a diagonal matrix that reverses the

								        // sign on the S-entry.

								        void EnsureNonnegativeDiagonal()

								        {

								            for (int i = 0; i < mNumCols; ++i)

								            {

								                if (mDiagonal[i] >= (Real)0)

								                {

								                    mFixupDiagonal[i] = (Real)1;

								                }

								                else

								                {

								                    mDiagonal[i] = -mDiagonal[i];

								                    mFixupDiagonal[i] = (Real)-1;

								                }

								            }

								        }


								        // Sort the singular values and compute the corresponding permutation

								        // of the indices of the array storing the singular values.  The

								        // permutation is used for reordering the singular values and the

								        // corresponding columns of the orthogonal matrix in the calls to

								        // GetSingularValues(...) and GetOrthogonalMatrices(...).

								        void ComputePermutation(int sortType)

								        {

								            if (sortType == 0)

								            {

								                // Set a flag for GetSingularValues() and

								                // GetOrthogonalMatrices() to know that sorted output was not

								                // requested.

								                mPermutation[0] = -1;

								                return;

								            }


								            // Compute the permutation induced by sorting.  Initially, we

								            // start with the identity permutation I = (0,1,...,N-1).

								            struct SortItem

								            {

								                Real singularValue;

								                int index;

								            };


								            std::vector<SortItem> items(mNumCols);

								            int i;

								            for (i = 0; i < mNumCols; ++i)

								            {

								                items[i].singularValue = mDiagonal[i];

								                items[i].index = i;

								            }


								            if (sortType > 0)

								            {

								                std::sort(items.begin(), items.end(),

								                    [](SortItem const& item0, SortItem const& item1)

								                    {

								                        return item0.singularValue < item1.singularValue;

								                    }

								                );

								            }

								            else

								            {

								                std::sort(items.begin(), items.end(),

								                    [](SortItem const& item0, SortItem const& item1)

								                    {

								                        return item0.singularValue > item1.singularValue;

								                    }

								                );

								            }


								            i = 0;

								            for (auto const& item : items)

								            {

								                mPermutation[i++] = item.index;

								            }


								            // GetOrthogonalMatrices() has nontrivial code for computing the

								            // orthogonal U and V from the reflections and rotations.  To

								            // avoid complicating the code further when sorting is requested,

								            // U and V are computed as in the unsorted case.  We then need to

								            // swap columns of U and V to be consistent with the sorting of

								            // the singular values.  To minimize copying due to column swaps,

								            // we use permutation P.  The minimum number of transpositions to

								            // obtain P from I is N minus the number of cycles of P.  Each

								            // cycle is reordered with a minimum number of transpositions;

								            // that is, the singular items are cyclically swapped, leading to

								            // a minimum amount of copying.  For example, if there is a

								            // cycle i0 -> i1 -> i2 -> i3, then the copying is

								            //   save = singularitem[i0];

								            //   singularitem[i1] = singularitem[i2];

								            //   singularitem[i2] = singularitem[i3];

								            //   singularitem[i3] = save;

								        }


								        // The number rows and columns of the matrices to be processed.

								        int mNumRows, mNumCols;


								        // The maximum number of iterations for reducing the bidiagonal matrix

								        // to a diagonal matrix.

								        unsigned int mMaxIterations;


								        // The internal copy of a matrix passed to the solver.  See the

								        // comments about function Bidiagonalize() about what is stored in the

								        // matrix.

								        std::vector<Real> mMatrix;  // MxN elements


								        // After the initial bidiagonalization by Householder reflections, we

								        // no longer need the full mMatrix.  Copy the diagonal and

								        // superdiagonal entries to linear arrays in order to be cache

								        // friendly.

								        std::vector<Real> mDiagonal;  // N elements

								        std::vector<Real> mSuperdiagonal;  // N-1 elements


								        // The Givens rotations used to reduce the initial bidiagonal matrix

								        // to a diagonal matrix. A rotation is the identity with the following

								        // replacement entries:  R(index0,index0) = cs, R(index0,index1) = sn,

								        // R(index1,index0) = -sn, and R(index1,index1) = cs.  If N is the

								        // number of matrix columns and K is the maximum number of iterations,

								        // the maximum number of right or left Givens rotations is K*(N-1).

								        // The maximum amount of memory is allocated to store these.  However,

								        // we also potentially need left rotations to decouple the matrix when

								        // diagonal terms are zero.  Worst case is a number of matrices

								        // quadratic in N, so for now we just use std::vector<Rotation> whose

								        // initial capacity is K*(N-1).

								        struct GivensRotation

								        {

								            // No default initialization for fast creation of std::vector of

								            // objects of this type.

								            GivensRotation() = default;


								            GivensRotation(int inIndex0, int inIndex1, Real inCs, Real inSn)

								                :

								                index0(inIndex0),

								                index1(inIndex1),

								                cs(inCs),

								                sn(inSn)

								            {

								            }


								            int index0, index1;

								            Real cs, sn;

								        };


								        std::vector<GivensRotation> mRGivens;

								        std::vector<GivensRotation> mLGivens;


								        // The diagonal matrix that is used to convert S-entries to

								        // nonnegative.

								        std::vector<Real> mFixupDiagonal;  // N elements


								        // When sorting is requested, the permutation associated with the sort

								        // is stored in mPermutation.  When sorting is not requested,

								        // mPermutation[0] is set to -1.  mVisited is used for finding cycles

								        // in the permutation.

								        std::vector<int> mPermutation;  // N elements

								        mutable std::vector<int> mVisited;  // N elements


								        // Temporary storage to compute Householder reflections and to support

								        // sorting of columns of the orthogonal matrices.

								        std::vector<Real> mTwoInvUTU;  // N elements

								        std::vector<Real> mTwoInvVTV;  // N-2 elements

								        mutable std::vector<Real> mUVector;  // M elements

								        mutable std::vector<Real> mVVector;  // N elements

								        mutable std::vector<Real> mWVector;  // max(M,N) elements

								    };

								}