Source code for prometheus.leastsquares

#!/usr/bin/env python

"""LEASTSQUARES.PY - Least squares solvers

"""

__authors__ = 'David Nidever <dnidever@montana.edu?'
__version__ = '20210908'  # yyyymmdd


import sys
import numpy as np
import scipy

[docs]def ishermitian(A):
    """ check if a matrix is Hermitian (equal to it's conjugate transpose)."""
    return np.allclose(A, np.asmatrix(A).H)

[docs]def isposdef(A):
    """ Check if a matrix positive definite."""
    if np.array_equal(A, A.T):
        try:
            np.linalg.cholesky(A)
            return True
        except np.linalg.LinAlgError:
            return False
    else:
        return False

[docs]def inverse(a):
    """ Safely take the inverse of a square 2D matrix."""
    # This checks for zeros on the diagonal and "fixes" them.
    
    # If one of the dimensions is zero in the R matrix [Npars,Npars]
    # then replace it with a "dummy" value.  A large value in R
    # will give a small value in inverse of R.
    badpar, = np.where(np.abs(np.diag(a))<sys.float_info.min)
    if len(badpar)>0:
        a[badpar,badpar] = 1e10
    try:
        ainv = np.linalg.inv(a)
    except:
        print('inverse problem')
        import pdb; pdb.set_trace()
    # Fix values
    a[badpar,badpar] = 0  # put values back
    ainv[badpar,badpar] = 0
    
    return ainv

[docs]def checkbounds(pars,bounds):
    """ Check the parameters against the bounds."""
    # 0 means it's fine
    # 1 means it's beyond the lower bound
    # 2 means it's beyond the upper bound
    npars = len(pars)
    lbounds,ubounds = bounds
    check = np.zeros(npars,int)
    check[pars<=lbounds] = 1
    check[pars>=ubounds] = 2
    return check
        
[docs]def limbounds(pars,bounds):
    """ Limit the parameters to the boundaries."""
    lbounds,ubounds = bounds
    outpars = np.minimum(np.maximum(pars,lbounds),ubounds)
    return outpars

[docs]def limsteps(steps,maxsteps):
    """ Limit the parameter steps to maximum step sizes."""
    signs = np.sign(steps)
    outsteps = np.minimum(np.abs(steps),maxsteps)
    outsteps *= signs
    return outsteps

[docs]def newpars(pars,steps,bounds=None,maxsteps=None):
    """ Return new parameters that fit the constraints."""
    # Limit the steps to maxsteps
    if maxsteps is not None:
        limited_steps = limsteps(steps,maxsteps)
    else:
        limited_steps = steps

    # No bounds input
    if bounds is None:
        return pars+limited_steps
        
    # Make sure that these don't cross the boundaries
    lbounds,ubounds = bounds
    check = checkbounds(pars+limited_steps,bounds)
    # Reduce step size for any parameters to go beyond the boundaries
    badpars = (check!=0)
    # reduce the step sizes until they are within bounds
    newsteps = limited_steps.copy()
    count = 0
    maxiter = 2
    while (np.sum(badpars)>0 and count<=maxiter):
        newsteps[badpars] /= 2
        newcheck = checkbounds(pars+newsteps,bounds)
        badpars = (newcheck!=0)
        count += 1
            
    # Final parameters
    newpars = pars + newsteps
            
    # Make sure to limit them to the boundaries
    check = checkbounds(newpars,bounds)
    badpars = (check!=0)
    if np.sum(badpars)>0:
        # add a tiny offset so it doesn't fit right on the boundary
        newpars = np.minimum(np.maximum(newpars,lbounds+1e-30),ubounds-1e-30)
    return newpars

[docs]def lsq_solve(xdata,data,jac,initpar,error=None,method='qr',model=None,
              bounds=None,fixed=None,steps=None,maxiter=20,minpercdiff=0.5,
              verbose=False):
    """
    Solve a non-linear problem with least squares.
    
    xdata : list or numpy array
        x and y values of the data.
    data : numpy array
       Data values.
    jac : function
       Jacobian function.  If model is not input then this is assumed to return
         *both* the model and jacobian.
    initpar : numpy array
       Initial guess parameters.
    error : numpy array, optional
       Uncertainties in data.
    method : str, optional
       Method to use for solving the non-linear least squares problem: "cholesky",
         "qr", "svd", and "curve_fit".  Default is "qr".
    model : function, optional
       Model function.
    bounds : list, optional
       Input lower and upper bounds/constraints on the fitting parameters (tuple of two
         lists.
    fixed : boolean list, optinal
       List of boolean values if what to hold fixed and what should be free to vary.
    steps : function, optional
       Function to limit the steps to some maximum values.  Should take parameters
         and bounds.
    maxiter : int, optional
       Maximum number of iterations.  Default is 20.
    minpercdiff : float, optional
       Minimum percent change in the parameters to allow until the solution is
         considered converged and the iteration loop is stopped.  Default is 0.5.
    verbose : boolean, optional
       Verbose output to the screen.  Default is False.
    
    Returns
    -------
    pars : numpy array
       Best-fit parameters.
    perror : numpy array
       Uncertainties in best-fit parameters.
    cov : numpy array
       Covariance matrix.

    Example
    -------

    pars,perror,cov = lsq_solve(xdata,data,jac,initpar)


    """


    # ADD A FIXED PARAMETER TO HOLD CERTAIN PARAMETERS FIXED!!
    
    # Iterate
    count = 0
    bestpar = initpar.copy()
    maxpercdiff = 1e10
    if steps is not None:
        maxsteps = steps(initpar,bounds)  # maximum steps
    else:
        maxsteps = None
    if error is not None:
        wt = 1.0/error.ravel()**2
    while (count<maxiter and maxpercdiff>minpercdiff):
        # Use Cholesky, QR or SVD to solve linear system of equations
        if model is None:
            m,j = jac(xdata,*bestpar)
        else:
            m = model(xdata,*bestpar)
            j = jac(xdata,*bestpar)            
        dy = data.ravel()-m.ravel()
        # Solve Jacobian
        if error is not None:
            dbeta = jac_solve(j,dy,method=method,weight=wt)
        else:
            dbeta = jac_solve(j,dy,method=method)
        dbeta[~np.isfinite(dbeta)] = 0.0  # deal with NaNs


        # -add "shift cutting" and "line search" in the least squares method
        # basically scale the beta vector to find the best match.
        # check this out
        # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.line_search.html
        
        # Update parameters
        oldpar = bestpar.copy()
        # limit the steps to the maximum step sizes and boundaries
        if bounds is not None or maxsteps is not None:
            bestpar = newpars(bestpar,dbeta,bounds,maxsteps)
        else:
            bestpar += dbeta
        # Check differences and changes
        diff = np.abs(bestpar-oldpar)
        denom = np.maximum(np.abs(oldpar.copy()),0.0001)
        percdiff = diff.copy()/denom*100  # percent differences
        maxpercdiff = np.max(percdiff)
                
        if verbose:
            print('N = '+str(ount))
            print('bestpars = '+str(bestpar))
            print('dbeta = '+str(dbeta))
                
        count += 1

    # Get covariance and errors
    if model is None:
        m,j = jac(xdata,*bestpar)
    else:
        m = model(xdata,*bestpar)
        j = jac(xdata,*bestpar)            
    dy = data.ravel()-m.ravel()
    if error is not None:
        cov = jac_covariance(j,dy,wt)
    else:
        cov = jac_covariance(j,dy) 
    perror = np.sqrt(np.diag(cov))

    return bestpar,perror,cov
    

[docs]def jac_solve(jac,resid,method=None,weight=None):
    """ Thin wrapper for the various jacobian solver method."""

    npix,npars = jac.shape

    #if npars==3:
    #    import pdb; pdb.set_trace()
    
    ## Check the sample covariance to make sure that each
    ## column is independent (and not zero)
    ## check if the sample covariance matrix is singular (has determinant of zero)
    # Just check if one entire column is zeros
    badpars, = np.where(np.sum(jac==0,axis=0) == npix)
    badpars = []
    usejac = jac
    if len(badpars)>0:
        if len(badpars)==npars:
            raise ValueError('All columns in the Jacobian matrix are zero')
        # Remove the offending column(s) in the Jacobian, solve and put zeros
        #  in the output dbeta array
        if len(badpars)>0:
            usejac = jac.copy()
            usejac = np.delete(usejac,badpars,axis=1)
            goodpars, = np.where(np.sum(jac==0,axis=0) != npix)
            #print('removing '+str(len(badpars))+' parameters ('+'.'.join(badpars.astype(str))+') with all zeros in jacobian')
    else:
        badpars = []
        usejac = jac

    # Solve the problem
    if method=='qr':
        dbeta = qr_jac_solve(usejac,resid,weight=weight)
    elif method=='svd':
        dbeta = svd_jac_solve(usejac,resid,weight=weight)
    elif method=='cholesky':
        dbeta = cholesky_jac_solve(usejac,resid,weight=weight)
    elif method=='lu':
        dbeta = lu_jac_solve(usejac,resid,weight=weight)        
    elif method=='sparse':
        dbeta = cholesky_jac_sparse_solve(usejac,resid,weight=weight)
    elif method=='kkt':
        dbeta = kkt_jac_solve(usejac,resid,weight=weight)        
    else:
        raise ValueError(method+' not supported')
    
    # Add back columns that were all zero
    if len(badpars)>0:
        origdbeta = dbeta.copy()
        dbeta = np.zeros(npars,float)
        dbeta[goodpars] = origdbeta
    
    return dbeta

[docs]def qr_jac_solve(jac,resid,weight=None):
    """ Solve part of a non-linear least squares equation using QR decomposition
        using the Jacobian."""
    # jac: Jacobian matrix, first derivatives, [Npix, Npars]
    # resid: residuals [Npix]
    # weight: weights, ~1/error**2 [Npix]
    
    # QR decomposition
    if weight is None:
        q,r = np.linalg.qr(jac)
        rinv = inverse(r)
        dbeta = rinv @ (q.T @ resid)
    # Weights input, multiply resid and jac by weights        
    else:
        q,r = np.linalg.qr( jac * weight.reshape(-1,1) )
        rinv = inverse(r)
        dbeta = rinv @ (q.T @ (resid*weight))
        
    return dbeta

[docs]def svd_jac_solve(jac,resid,weight=None):
    """ Solve part of a non-linear least squares equation using Single Value
        Decomposition (SVD) using the Jacobian."""
    # jac: Jacobian matrix, first derivatives, [Npix, Npars]
    # resid: residuals [Npix]

    # Precondition??
    
    # Singular Value decomposition (SVD)
    if weight is None:
        u,s,vt = np.linalg.svd(jac)
        #u,s,vt = sparse.linalg.svds(jac)
        # u: [Npix,Npix]
        # s: [Npars]
        # vt: [Npars,Npars]
        # dy: [Npix]
        sinv = s.copy()*0  # pseudo-inverse
        sinv[s!=0] = 1/s[s!=0]
        npars = len(s)
        dbeta = vt.T @ ((u.T @ resid)[0:npars]*sinv)

    # multply resid and jac by weights
    else:
        u,s,vt = np.linalg.svd( jac * weight.reshape(-1,1) )
        sinv = s.copy()*0  # pseudo-inverse
        sinv[s!=0] = 1/s[s!=0]
        npars = len(s)
        dbeta = vt.T @ ((u.T @ (resid*weight))[0:npars]*sinv)
        
    return dbeta

[docs]def cholesky_jac_sparse_solve(jac,resid,weight=None):
    """ Solve part a non-linear least squares equation using Cholesky decomposition
        using the Jacobian, with sparse matrices."""
    # jac: Jacobian matrix, first derivatives, [Npix, Npars]
    # resid: residuals [Npix]

    # Precondition??

    from scipy import sparse
    
    # Multipy dy and jac by weights
    if weight is not None:
        resid = resid * weight        
        jac = jac * weight.reshape(-1,1)

    if weight is None:
        # J * x = resid
        # J.T J x = J.T resid
        # A = (J.T @ J)
        # b = np.dot(J.T*dy)
        # J is [3*Nstar,Npix]
        # A is [3*Nstar,3*Nstar]
        jac = sparse.csc_matrix(jac)  # make it sparse
        A = jac.T @ jac
        b = jac.T.dot(resid)
        # Now solve linear least squares with sparse
        # Ax = b
        from sksparse.cholmod import cholesky
        factor = cholesky(A)
        dbeta = factor(b)

    # multply resid and jac by weights
    else:
        wjac = jac * weight.reshape(-1,1)        
        wjac = sparse.csc_matrix(wjac)  # make it sparse
        A = wjac.T @ wjac
        b = wjac.T.dot(resid*weight)
        # Now solve linear least squares with sparse
        # Ax = b
        from sksparse.cholmod import cholesky
        factor = cholesky(A)
        dbeta = factor(b)

    return dbeta

[docs]def kkt_jac_solve(jac,resid,weight=None,maxiter=None):
    """ Solve part a non-linear least squares equation using KKT (Karush-Kuhn-Tucker)
        method with the Jacobian."""
    # jac: Jacobian matrix, first derivatives, [Npix, Npars]
    # resid: residuals [Npix]

    if weight is None:
        # J * x = resid
        # J.T J x = J.T resid
        # A = (J.T @ J)
        # b = np.dot(J.T*dy)
        A = jac.T @ jac
        b = np.dot(jac.T,resid)

    # multply resid and jac by weights        
    else:
        wjac = jac * weight.reshape(-1,1)
        A = wjac.T @ wjac
        b = np.dot(wjac.T,resid*weight)        

    # Use KKT method
    x = scipy.optimize.nnls(A,b,maxiter=maxiter)

    return x

[docs]def cholesky_jac_solve(jac,resid,weight=None):
    """ Solve part a non-linear least squares equation using Cholesky decomposition
        using the Jacobian."""
    # jac: Jacobian matrix, first derivatives, [Npix, Npars]
    # resid: residuals [Npix]

    if weight is None:
        # J * x = resid
        # J.T J x = J.T resid
        # A = (J.T @ J)
        # b = np.dot(J.T*dy)
        A = jac.T @ jac
        b = np.dot(jac.T,resid)

    # multply resid and jac by weights        
    else:
        wjac = jac * weight.reshape(-1,1)
        A = wjac.T @ wjac
        b = np.dot(wjac.T,resid*weight)        

    # Now solve linear least squares with cholesky decomposition
    # Ax = b
    #  try Cholesky decomposition, but it can fail if the matrix
    #  is not positive definite
    try:
        x = cholesky_solve(A,b)
    # try LU decomposition if cholesky fails, LU works for more cases
    except:
        x = lu_solve(A,b)
    return x

[docs]def cholesky_solve(A,b):
    """ Solve linear least squares problem with Cholesky decomposition."""

    # Now solve linear least squares with cholesky decomposition
    # Ax = b
    # decompose A into L L* using cholesky decomposition
    #  L and L* are triangular matrices
    #  L* is conjugate transpose
    # solve Ly=b (where L*x=y) for y by forward substitution
    # finally solve L*x = y for x by back substitution

    #L = np.linalg.cholesky(A)
    #Lstar = L.T.conj()   # Lstar is conjugate transpose
    #y = scipy.linalg.solve_triangular(L,b)
    #x = scipy.linalg.solve_triangular(Lstar,y)

    # this gives better results, not sure why
    c, low = scipy.linalg.cho_factor(A)
    x = scipy.linalg.cho_solve((c, low), b)
    
    return x

[docs]def lu_jac_solve(jac,resid,weight=None):
    """ Solve part a non-linear least squares equation using LU decomposition
        using the Jacobian."""
    # jac: Jacobian matrix, first derivatives, [Npix, Npars]
    # resid: residuals [Npix]

    # Multipy dy and jac by weights
    if weight is not None:
        resid = resid * weight        
        jac = jac * weight.reshape(-1,1)
    
    # J * x = resid
    # J.T J x = J.T resid
    # A = (J.T @ J)
    # b = np.dot(J.T*dy)
    A = jac.T @ jac
    b = np.dot(jac.T,resid)

    # Now solve linear least squares with cholesky decomposition
    # Ax = b    
    return lu_solve(A,b)

[docs]def lu_solve(A,b):
    """ Solve linear least squares problem with LU decomposition."""

    lu, piv = scipy.linalg.lu_factor(A)
    x = scipy.linalg.lu_solve((lu, piv), b)
    
    # Solve by two back substitution problems
    # Ax = b
    # Use LU decomposition to get A=LU
    # LUx = b
    # now break into two equations
    # Ly = b, solve by forward substitution to get y
    # Ux = y, solve by backward substitution to get x
    #P,L,U = scipy.linalg.lu(A)
    #y = scipy.linalg.solve_triangular(P@L,b)
    #x = scipy.linalg.solve_triangular(U,y)
    return x
    

[docs]def jac_covariance(jac,resid,wt=None):
    """
    Determine the covariance matrix.
    
    Parameters
    ----------
    jac : numpy array
       The 2-D jacobian (first derivative relative to the parameters) array
         with dimensions [Npix,Npar].
    resid : numpy array
       Residual array (data-best model) with dimensions [Npix].
    wt : numpy array, optional
       Weight array (typically 1/sigma**2) with dimensions [Npix].

    Returns
    -------
    cov : numpy array
       Covariance array with dimensions [Npar,Npar].

    Example
    -------

    cov = jac_covariance(jac,resid,wt)

    """
    
    # https://stats.stackexchange.com/questions/93316/parameter-uncertainty-after-non-linear-least-squares-estimation
    # more background here, too: http://ceres-solver.org/nnls_covariance.html        

    # Hessian = J.T * T, Hessian Matrix
    #  higher order terms are assumed to be small
    # https://www8.cs.umu.se/kurser/5DA001/HT07/lectures/lsq-handouts.pdf

    npix,npars = jac.shape
    
    # Weights
    #   If weighted least-squares then
    #   J.T * W * J
    #   where W = I/sig_i**2
    if wt is not None:
        wt2 = wt.reshape(-1,1) + np.zeros(npars)
        hess = jac.T @ (wt2 * jac)
    else:
        hess = jac.T @ jac  # not weighted

    # cov = H-1, covariance matrix is inverse of Hessian matrix
    cov_orig = inverse(hess)

    # Rescale to get an unbiased estimate
    # cov_scaled = cov * (RSS/(m-n)), where m=number of measurements, n=number of parameters
    # RSS = residual sum of squares
    #  using rss gives values consistent with what curve_fit returns
    # Use chi-squared, since we are doing the weighted least-squares and weighted Hessian
    if wt is not None:
        chisq = np.sum(resid**2 * wt)
    else:
        chisq = np.sum(resid**2)
    cov = cov_orig * (chisq/(npix-npars))  # what MPFITFUN suggests, but very small
        
    return cov
Navigation

Source code for prometheus.leastsquares