Search packages:
 Sourcecode: octave-statistics version 1.0.61.0.6-11.0.71.0.81.0.8-11.0.91.0.9-1

# anderson_darling_test.m

## [q,Asq,info] = anderson_darling_test(x,uniform|normal|exponential)
##
## Test the hypothesis that x is selected from the given distribution
## using the Anderson-Darling test.  If the returned q is small, reject
## the hypothesis at the q*100% level.
##
## The Anderson-Darling A^2 statistic is calculated as follows:
##
##    A^2_n = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1}))
##
## where z_i is the ordered position of the x's in the CDF of the
## distribution.  Unlike the Kolmogorov-Smirnov statistic, the
## Anderson-Darling statistic is sensitive to the tails of the
## distribution.
##
## For 'normal' and 'exponential' distributions, estimate the
## distribution parameters from the data, convert the values
## to CDF values, and compare the result to tabluated critical
## values.  This includes an correction for small n which
## works well enough for n >= 8, but less so from smaller n.  The
## returned info.Asq_corrected contains the adjusted statistic.
##
## For 'uniform', assume the values are uniformly distributed
## in (0,1), compute A^2 and return the corresponding p value from
## 1-anderson_darling_cdf(A^2,n).
##
## If you are selecting from a known distribution, convert your
## values into CDF values for the distribution and use 'uniform'.
## Do not use 'uniform' if the distribution parameters are estimated
## from the data itself, as this sharply biases the A^2 statistic
## toward smaller values.
##
## [1] Stephens, MA; (1986), "Tests based on EDF statistics", in
## D'Agostino, RB; Stephens, MA; (eds.) Goodness-of-fit Techinques.
## New York: Dekker.

## Author: Paul Kienzle
## This program is granted to the public domain.
function [q,Asq,info] = anderson_darling_test(x,dist)

if size(x,1) == 1, x=x(:); end
x = sort(x);
n = size(x,1);
use_cdf = 0;

# Compute adjustment and critical values to use for stats.
switch dist
case 'normal',
# This expression for adj is used in R.
# Note that the values from NIST dataplot don't work nearly as well.
adj = 1 + (.75 + 2.25/n)/n;
qvals = ;
Acrit = ;
x = stdnormal_cdf(zscore(x));

case 'uniform',
## Put invalid data at the limits of the distribution
## This will drive the statistic to infinity.
x(x<0) = 0;
x(x>1) = 1;
qvals = ;
Acrit = ;
use_cdf = 1;

case 'XXXweibull',
qvals = ;
Acrit = ;
## XXX FIXME XXX how to fit alpha and sigma?
x = weibull_cdf(x,ones(n,1)*alpha,ones(n,1)*sigma);

case 'exponential',
qvals = ;
# Critical values depend on n.  Choose the appropriate critical set.
# These values come from NIST dataplot/src/dp8.f.
Acritn = ;
# FIXME: consider interpolating in the critical value table.
Acrit = Acritn(lookup(Acritn(:,1),n),2:5);

lambda = 1./mean(x);  # exponential parameter estimation
x = exponential_cdf(x,ones(n,1)*lambda);

otherwise
# FIXME consider implementing more of distributions; a number
# of them are defined in NIST dataplot/src/dp8.f.
error("Anderson-Darling test for %s not implemented", dist);
end

if any(x<0 | x>1)
error('Anderson-Darling test requires data in CDF form');
end

i = [1:n]'*ones(1,size(x,2));
Asq = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n;

# Lookup adjusted critical value in the cdf (if uniform) or in the
# the critical table.
if use_cdf
else
q = (idx);
end

if nargout > 2,
info.Asq = Asq;
info.Asq_critical = ';
info.p = 1-q;
info.p_is_precise = use_cdf;
end

%!demo
%! c = anderson_darling_test(10*rande(12,10000),'exponential');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.

%!demo
%! c = anderson_darling_test(randn(12,10000),'normal');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.

%!demo
%! c = anderson_darling_test(rand(12,10000),'uniform');
%! hist(100*c,1:2:99);
%! % The histogram should be flat more or less.


Generated by  Doxygen 1.6.0   Back to index