Logo Search packages:      
Sourcecode: octave-statistics version File versions

anderson_darling_test.m

## [q,Asq,info] = anderson_darling_test(x,uniform|normal|exponential)
##
## Test the hypothesis that x is selected from the given distribution
## using the Anderson-Darling test.  If the returned q is small, reject
## the hypothesis at the q*100% level.
##
## The Anderson-Darling A^2 statistic is calculated as follows:
##
##    A^2_n = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1}))
##
## where z_i is the ordered position of the x's in the CDF of the 
## distribution.  Unlike the Kolmogorov-Smirnov statistic, the 
## Anderson-Darling statistic is sensitive to the tails of the 
## distribution.
##
## For 'normal' and 'exponential' distributions, estimate the 
## distribution parameters from the data, convert the values 
## to CDF values, and compare the result to tabluated critical 
## values.  This includes an correction for small n which 
## works well enough for n >= 8, but less so from smaller n.  The
## returned info.Asq_corrected contains the adjusted statistic.
##
## For 'uniform', assume the values are uniformly distributed
## in (0,1), compute A^2 and return the corresponding p value from
## 1-anderson_darling_cdf(A^2,n).
## 
## If you are selecting from a known distribution, convert your 
## values into CDF values for the distribution and use 'uniform'.
## Do not use 'uniform' if the distribution parameters are estimated 
## from the data itself, as this sharply biases the A^2 statistic
## toward smaller values.
##
## [1] Stephens, MA; (1986), "Tests based on EDF statistics", in
## D'Agostino, RB; Stephens, MA; (eds.) Goodness-of-fit Techinques.
## New York: Dekker.

## Author: Paul Kienzle
## This program is granted to the public domain.
function [q,Asq,info] = anderson_darling_test(x,dist)

  if size(x,1) == 1, x=x(:); end
  x = sort(x);
  n = size(x,1);
  use_cdf = 0;

  # Compute adjustment and critical values to use for stats.
  switch dist
    case 'normal',
      # This expression for adj is used in R.
      # Note that the values from NIST dataplot don't work nearly as well.
      adj = 1 + (.75 + 2.25/n)/n;
      qvals = ;
      Acrit = ;
      x = stdnormal_cdf(zscore(x));

    case 'uniform',
      ## Put invalid data at the limits of the distribution
      ## This will drive the statistic to infinity.
      x(x<0) = 0;
      x(x>1) = 1;
      adj = 1.;
      qvals = ;
      Acrit = ;
      use_cdf = 1;

    case 'XXXweibull',
      adj = 1 + 0.2/sqrt(n);
      qvals = ;
      Acrit = ;
      ## XXX FIXME XXX how to fit alpha and sigma?
      x = weibull_cdf(x,ones(n,1)*alpha,ones(n,1)*sigma);

    case 'exponential',
      adj = 1 + 0.6/n;
      qvals = ;
      # Critical values depend on n.  Choose the appropriate critical set.
      # These values come from NIST dataplot/src/dp8.f.
      Acritn = ;
      # FIXME: consider interpolating in the critical value table.
      Acrit = Acritn(lookup(Acritn(:,1),n),2:5);

      lambda = 1./mean(x);  # exponential parameter estimation
      x = exponential_cdf(x,ones(n,1)*lambda);

    otherwise
      # FIXME consider implementing more of distributions; a number
      # of them are defined in NIST dataplot/src/dp8.f.
      error("Anderson-Darling test for %s not implemented", dist);
  end

  if any(x<0 | x>1)
    error('Anderson-Darling test requires data in CDF form');
  end

  i = [1:n]'*ones(1,size(x,2));
  Asq = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n;

  # Lookup adjusted critical value in the cdf (if uniform) or in the
  # the critical table.
  if use_cdf
    q = 1-anderson_darling_cdf(Asq*adj, n);
  else
    idx = lookup([-Inf,Acrit],Asq*adj);
    q = (idx); 
  end

  if nargout > 2,
    info.Asq = Asq;
    info.Asq_corrected = Asq*adj;
    info.Asq_critical = ';
    info.p = 1-q;
    info.p_is_precise = use_cdf;
  end


%!demo
%! c = anderson_darling_test(10*rande(12,10000),'exponential');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.

%!demo
%! c = anderson_darling_test(randn(12,10000),'normal');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.

%!demo
%! c = anderson_darling_test(rand(12,10000),'uniform');
%! hist(100*c,1:2:99);
%! % The histogram should be flat more or less.

Generated by  Doxygen 1.6.0   Back to index