Source code for dlnpyutils.ladfit

#!/usr/bin/env python
#
# LADFIT.PY - robust linear fitting.
#

from __future__ import print_function

__authors__ = 'David Nidever <dnidever@montana.edu>'
__version__ = '20200321'  # yyyymmdd

import numpy as np
import warnings

# Ignore these warnings, it's a bug
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")


[docs]def ladmdfunc(b, x, y, eps=1e-7):
    a = np.median(y - b*x)
    d = y - (b * x + a)
    absdev = np.sum(np.abs(d))
    nz, = np.where(y != 0.0)
    nzcount = len(nz)
    if nzcount != 0:
        d[nz] = d[nz] / np.abs(y[nz]) #Normalize
    nz, = np.where(np.abs(d) > eps)
    nzcount = len(nz)
    if nzcount != 0:        #Sign fcn, +1 for d > 0, -1 for d < 0, else 0.
        return np.sum(x[nz] * ((d[nz] > 0)*1 - (d[nz] < 0)*1)), a, absdev
    else:
        return 0.0, a, absdev

#-------------------------------------------------------------------------
[docs]def ladfit(xin, yin):
    """
    Copyright (c) 1994-2015, Exelis Visual Information Solutions, Inc. All
    rights reserved. Unauthorized reproduction is prohibited.

    LADFIT

    This function fits the paired data {X(i), Y(i)} to the linear model,
    y = A + Bx, using a "robust" least absolute deviation method. The
    result is a two-element vector containing the model parameters, A
    and B.


    Result = LADFIT(X, Y)

    Parameters
    ----------
       X:    An n-element vector of type integer, float or double.

       Y:    An n-element vector of type integer, float or double.

    EXAMPLE:
       Define two n-element vectors of paired data.
         x = [-3.20, 4.49, -1.66, 0.64, -2.43, -0.89, -0.12, 1.41, $
               2.95, 2.18,  3.72, 5.26]
         y = [-7.14, -1.30, -4.26, -1.90, -6.19, -3.98, -2.87, -1.66, $
              -0.78, -2.61,  0.31,  1.74]
       Compute the model parameters, A and B.
         result = ladfit(x, y, absdev = absdev)
       The result should be the two-element vector:
         [-3.15301, 0.930440]
       The keyword parameter should be returned as:
         absdev = 0.636851

    REFERENCE:
       Numerical Recipes, The Art of Scientific Computing (Second Edition)
       Cambridge University Press, 2nd Edition.
       ISBN 0-521-43108-5
    This is adapted from the routine MEDFIT described in:
    Fitting a Line by Minimizing Absolute Deviation, Page 703.

    MODIFICATION HISTORY:
      Written by:  GGS, RSI, September 1994
      Modified:    GGS, RSI, July 1995
                    Corrected an infinite loop condition that occured when
                    the X input parameter contained mostly negative data.
      Modified:    GGS, RSI, October 1996
                    If least-absolute-deviation convergence condition is not
                    satisfied, the algorithm switches to a chi-squared model.
                    Modified keyword checking and use of double precision.
      Modified:    GGS, RSI, November 1996
                    Fixed an error in the computation of the median with
                    even-length input data. See EVEN keyword to MEDIAN.
      Modified:    DMS, RSI, June 1997
         Simplified logic, remove SIGN and MDfunc2 functions.
      Modified:    RJF, RSI, Jan 1999
         Fixed the variance computation by adding some double
         conversions.  This prevents the function from generating
         NaNs on some specific datasets (bug 11680).
       Modified: CT, RSI, July 2002: Convert inputs to float or double.
            Change constants to double precision if necessary.
       CT, March 2004: Check for quick return if we found solution.
    """
    
    nx = len(xin)
  
    if nx != len(yin):
        raise ValuError("X and Y must be vectors of equal length.")

    x = np.float64(xin)
    y = np.float64(yin)

    sx = np.sum(x)
    sy = np.sum(y)

    #  the variance computation is sensitive to roundoff, so we do this
    #  math in DP
    sxy = np.sum(x*y)
    sxx = np.sum(x*x)
    delx = nx * sxx - sx**2

    if (delx == 0.0):         #All X's are the same
        result = [np.median(y), 0.0] #Bisect the range w/ a flat line
        absdev = np.sum(np.abs(y-np.median(y)))/nx
        return np.array(result), absdev

    aa = (sxx * sy - sx * sxy) / delx #Least squares solution y = x * aa + bb
    bb = (nx * sxy - sx * sy) / delx
    chisqr = np.sum((y - (aa + bb*x))**2)
    sigb = np.sqrt(chisqr / delx)     #Standard deviation
    
    b1 = bb
    eps = 1e-7
    f1,aa,absdev = ladmdfunc(b1, x, y, eps=eps)

    # Quick return. The initial least squares gradient is the LAD solution.
    if (f1 == 0.):
        bb = b1
        absdev = absdev / nx
        return np.array([aa, bb],float), absdev

    #delb = ((f1 >= 0) ? 3.0 : -3.0) * sigb
    delb = 3.0*sigb if (f1 >= 0) else -3.0*sigb
    
    b2 = b1 + delb
    f2,aa,absdev = ladmdfunc(b2, x, y, eps=eps)

    while (f1*f2 > 0):     #Bracket the zero of the function
        b1 = b2
        f1 = f2
        b2 = b1 + delb
        f2,aa,absdev = ladmdfunc(b2, x, y, eps=eps)


    # In case we finish early.
    bb = b2
    f = f2

    #Narrow tolerance to refine 0 of fcn.
    sigb = 0.01 * sigb

    while ((np.abs(b2-b1) > sigb) and (f != 0)): #bisection of interval b1,b2.
        bb = 0.5 * (b1 + b2)
        if (bb == b1 or bb == b2):
            break
        f,aa,absdev = ladmdfunc(bb, x, y, eps=eps)
        if (f*f1 >= 0):
            f1 = f
            b1 = bb
        else:
            f2 = f
            b2 = bb

    absdev = absdev / nx

    return np.array([aa, bb],float), absdev
Navigation

Source code for dlnpyutils.ladfit