001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.inference;
018    
019    import org.apache.commons.math.MathException;
020    import org.apache.commons.math.MathRuntimeException;
021    import org.apache.commons.math.distribution.ChiSquaredDistribution;
022    import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
023    import org.apache.commons.math.exception.util.LocalizedFormats;
024    import org.apache.commons.math.util.FastMath;
025    
026    /**
027     * Implements Chi-Square test statistics defined in the
028     * {@link UnknownDistributionChiSquareTest} interface.
029     *
030     * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 ao??t 2010) $
031     */
032    public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
033    
034        /** Distribution used to compute inference statistics. */
035        private ChiSquaredDistribution distribution;
036    
037        /**
038         * Construct a ChiSquareTestImpl
039         */
040        public ChiSquareTestImpl() {
041            this(new ChiSquaredDistributionImpl(1.0));
042        }
043    
044        /**
045         * Create a test instance using the given distribution for computing
046         * inference statistics.
047         * @param x distribution used to compute inference statistics.
048         * @since 1.2
049         */
050        public ChiSquareTestImpl(ChiSquaredDistribution x) {
051            super();
052            setDistribution(x);
053        }
054         /**
055         * {@inheritDoc}
056         * <p><strong>Note: </strong>This implementation rescales the
057         * <code>expected</code> array if necessary to ensure that the sum of the
058         * expected and observed counts are equal.</p>
059         *
060         * @param observed array of observed frequency counts
061         * @param expected array of expected frequency counts
062         * @return chi-square test statistic
063         * @throws IllegalArgumentException if preconditions are not met
064         * or length is less than 2
065         */
066        public double chiSquare(double[] expected, long[] observed)
067            throws IllegalArgumentException {
068            if (expected.length < 2) {
069                throw MathRuntimeException.createIllegalArgumentException(
070                      LocalizedFormats.INSUFFICIENT_DIMENSION, expected.length, 2);
071            }
072            if (expected.length != observed.length) {
073                throw MathRuntimeException.createIllegalArgumentException(
074                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, expected.length, observed.length);
075            }
076            checkPositive(expected);
077            checkNonNegative(observed);
078            double sumExpected = 0d;
079            double sumObserved = 0d;
080            for (int i = 0; i < observed.length; i++) {
081                sumExpected += expected[i];
082                sumObserved += observed[i];
083            }
084            double ratio = 1.0d;
085            boolean rescale = false;
086            if (FastMath.abs(sumExpected - sumObserved) > 10E-6) {
087                ratio = sumObserved / sumExpected;
088                rescale = true;
089            }
090            double sumSq = 0.0d;
091            for (int i = 0; i < observed.length; i++) {
092                if (rescale) {
093                    final double dev = observed[i] - ratio * expected[i];
094                    sumSq += dev * dev / (ratio * expected[i]);
095                } else {
096                    final double dev = observed[i] - expected[i];
097                    sumSq += dev * dev / expected[i];
098                }
099            }
100            return sumSq;
101        }
102    
103        /**
104         * {@inheritDoc}
105         * <p><strong>Note: </strong>This implementation rescales the
106         * <code>expected</code> array if necessary to ensure that the sum of the
107         * expected and observed counts are equal.</p>
108         *
109         * @param observed array of observed frequency counts
110         * @param expected array of expected frequency counts
111         * @return p-value
112         * @throws IllegalArgumentException if preconditions are not met
113         * @throws MathException if an error occurs computing the p-value
114         */
115        public double chiSquareTest(double[] expected, long[] observed)
116            throws IllegalArgumentException, MathException {
117            distribution.setDegreesOfFreedom(expected.length - 1.0);
118            return 1.0 - distribution.cumulativeProbability(
119                chiSquare(expected, observed));
120        }
121    
122        /**
123         * {@inheritDoc}
124         * <p><strong>Note: </strong>This implementation rescales the
125         * <code>expected</code> array if necessary to ensure that the sum of the
126         * expected and observed counts are equal.</p>
127         *
128         * @param observed array of observed frequency counts
129         * @param expected array of expected frequency counts
130         * @param alpha significance level of the test
131         * @return true iff null hypothesis can be rejected with confidence
132         * 1 - alpha
133         * @throws IllegalArgumentException if preconditions are not met
134         * @throws MathException if an error occurs performing the test
135         */
136        public boolean chiSquareTest(double[] expected, long[] observed,
137                double alpha) throws IllegalArgumentException, MathException {
138            if ((alpha <= 0) || (alpha > 0.5)) {
139                throw MathRuntimeException.createIllegalArgumentException(
140                      LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
141                      alpha, 0, 0.5);
142            }
143            return chiSquareTest(expected, observed) < alpha;
144        }
145    
146        /**
147         * @param counts array representation of 2-way table
148         * @return chi-square test statistic
149         * @throws IllegalArgumentException if preconditions are not met
150         */
151        public double chiSquare(long[][] counts) throws IllegalArgumentException {
152    
153            checkArray(counts);
154            int nRows = counts.length;
155            int nCols = counts[0].length;
156    
157            // compute row, column and total sums
158            double[] rowSum = new double[nRows];
159            double[] colSum = new double[nCols];
160            double total = 0.0d;
161            for (int row = 0; row < nRows; row++) {
162                for (int col = 0; col < nCols; col++) {
163                    rowSum[row] += counts[row][col];
164                    colSum[col] += counts[row][col];
165                    total += counts[row][col];
166                }
167            }
168    
169            // compute expected counts and chi-square
170            double sumSq = 0.0d;
171            double expected = 0.0d;
172            for (int row = 0; row < nRows; row++) {
173                for (int col = 0; col < nCols; col++) {
174                    expected = (rowSum[row] * colSum[col]) / total;
175                    sumSq += ((counts[row][col] - expected) *
176                            (counts[row][col] - expected)) / expected;
177                }
178            }
179            return sumSq;
180        }
181    
182        /**
183         * @param counts array representation of 2-way table
184         * @return p-value
185         * @throws IllegalArgumentException if preconditions are not met
186         * @throws MathException if an error occurs computing the p-value
187         */
188        public double chiSquareTest(long[][] counts)
189        throws IllegalArgumentException, MathException {
190            checkArray(counts);
191            double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192            distribution.setDegreesOfFreedom(df);
193            return 1 - distribution.cumulativeProbability(chiSquare(counts));
194        }
195    
196        /**
197         * @param counts array representation of 2-way table
198         * @param alpha significance level of the test
199         * @return true iff null hypothesis can be rejected with confidence
200         * 1 - alpha
201         * @throws IllegalArgumentException if preconditions are not met
202         * @throws MathException if an error occurs performing the test
203         */
204        public boolean chiSquareTest(long[][] counts, double alpha)
205        throws IllegalArgumentException, MathException {
206            if ((alpha <= 0) || (alpha > 0.5)) {
207                throw MathRuntimeException.createIllegalArgumentException(
208                      LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
209                      alpha, 0.0, 0.5);
210            }
211            return chiSquareTest(counts) < alpha;
212        }
213    
214        /**
215         * @param observed1 array of observed frequency counts of the first data set
216         * @param observed2 array of observed frequency counts of the second data set
217         * @return chi-square test statistic
218         * @throws IllegalArgumentException if preconditions are not met
219         * @since 1.2
220         */
221        public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222            throws IllegalArgumentException {
223    
224            // Make sure lengths are same
225            if (observed1.length < 2) {
226                throw MathRuntimeException.createIllegalArgumentException(
227                      LocalizedFormats.INSUFFICIENT_DIMENSION, observed1.length, 2);
228            }
229            if (observed1.length != observed2.length) {
230                throw MathRuntimeException.createIllegalArgumentException(
231                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
232                      observed1.length, observed2.length);
233            }
234    
235            // Ensure non-negative counts
236            checkNonNegative(observed1);
237            checkNonNegative(observed2);
238    
239            // Compute and compare count sums
240            long countSum1 = 0;
241            long countSum2 = 0;
242            boolean unequalCounts = false;
243            double weight = 0.0;
244            for (int i = 0; i < observed1.length; i++) {
245                countSum1 += observed1[i];
246                countSum2 += observed2[i];
247            }
248            // Ensure neither sample is uniformly 0
249            if (countSum1 == 0) {
250                throw MathRuntimeException.createIllegalArgumentException(
251                      LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 1);
252            }
253            if (countSum2 == 0) {
254                throw MathRuntimeException.createIllegalArgumentException(
255                      LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 2);
256            }
257            // Compare and compute weight only if different
258            unequalCounts = countSum1 != countSum2;
259            if (unequalCounts) {
260                weight = FastMath.sqrt((double) countSum1 / (double) countSum2);
261            }
262            // Compute ChiSquare statistic
263            double sumSq = 0.0d;
264            double dev = 0.0d;
265            double obs1 = 0.0d;
266            double obs2 = 0.0d;
267            for (int i = 0; i < observed1.length; i++) {
268                if (observed1[i] == 0 && observed2[i] == 0) {
269                    throw MathRuntimeException.createIllegalArgumentException(
270                          LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
271                } else {
272                    obs1 = observed1[i];
273                    obs2 = observed2[i];
274                    if (unequalCounts) { // apply weights
275                        dev = obs1/weight - obs2 * weight;
276                    } else {
277                        dev = obs1 - obs2;
278                    }
279                    sumSq += (dev * dev) / (obs1 + obs2);
280                }
281            }
282            return sumSq;
283        }
284    
285        /**
286         * @param observed1 array of observed frequency counts of the first data set
287         * @param observed2 array of observed frequency counts of the second data set
288         * @return p-value
289         * @throws IllegalArgumentException if preconditions are not met
290         * @throws MathException if an error occurs computing the p-value
291         * @since 1.2
292         */
293        public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
294            throws IllegalArgumentException, MathException {
295            distribution.setDegreesOfFreedom((double) observed1.length - 1);
296            return 1 - distribution.cumulativeProbability(
297                    chiSquareDataSetsComparison(observed1, observed2));
298        }
299    
300        /**
301         * @param observed1 array of observed frequency counts of the first data set
302         * @param observed2 array of observed frequency counts of the second data set
303         * @param alpha significance level of the test
304         * @return true iff null hypothesis can be rejected with confidence
305         * 1 - alpha
306         * @throws IllegalArgumentException if preconditions are not met
307         * @throws MathException if an error occurs performing the test
308         * @since 1.2
309         */
310        public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
311                double alpha) throws IllegalArgumentException, MathException {
312            if ((alpha <= 0) || (alpha > 0.5)) {
313                throw MathRuntimeException.createIllegalArgumentException(
314                      LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
315                      alpha, 0.0, 0.5);
316            }
317            return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
318        }
319    
320        /**
321         * Checks to make sure that the input long[][] array is rectangular,
322         * has at least 2 rows and 2 columns, and has all non-negative entries,
323         * throwing IllegalArgumentException if any of these checks fail.
324         *
325         * @param in input 2-way table to check
326         * @throws IllegalArgumentException if the array is not valid
327         */
328        private void checkArray(long[][] in) throws IllegalArgumentException {
329    
330            if (in.length < 2) {
331                throw MathRuntimeException.createIllegalArgumentException(
332                      LocalizedFormats.INSUFFICIENT_DIMENSION, in.length, 2);
333            }
334    
335            if (in[0].length < 2) {
336                throw MathRuntimeException.createIllegalArgumentException(
337                      LocalizedFormats.INSUFFICIENT_DIMENSION, in[0].length, 2);
338            }
339    
340            checkRectangular(in);
341            checkNonNegative(in);
342    
343        }
344    
345        //---------------------  Private array methods -- should find a utility home for these
346    
347        /**
348         * Throws IllegalArgumentException if the input array is not rectangular.
349         *
350         * @param in array to be tested
351         * @throws NullPointerException if input array is null
352         * @throws IllegalArgumentException if input array is not rectangular
353         */
354        private void checkRectangular(long[][] in) {
355            for (int i = 1; i < in.length; i++) {
356                if (in[i].length != in[0].length) {
357                    throw MathRuntimeException.createIllegalArgumentException(
358                          LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
359                          in[i].length, in[0].length);
360                }
361            }
362        }
363    
364        /**
365         * Check all entries of the input array are > 0.
366         *
367         * @param in array to be tested
368         * @exception IllegalArgumentException if one entry is not positive
369         */
370        private void checkPositive(double[] in) throws IllegalArgumentException {
371            for (int i = 0; i < in.length; i++) {
372                if (in[i] <= 0) {
373                    throw MathRuntimeException.createIllegalArgumentException(
374                          LocalizedFormats.NOT_POSITIVE_ELEMENT_AT_INDEX,
375                          i, in[i]);
376                }
377            }
378        }
379    
380        /**
381         * Check all entries of the input array are >= 0.
382         *
383         * @param in array to be tested
384         * @exception IllegalArgumentException if one entry is negative
385         */
386        private void checkNonNegative(long[] in) throws IllegalArgumentException {
387            for (int i = 0; i < in.length; i++) {
388                if (in[i] < 0) {
389                    throw MathRuntimeException.createIllegalArgumentException(
390                          LocalizedFormats.NEGATIVE_ELEMENT_AT_INDEX,
391                          i, in[i]);
392                }
393            }
394        }
395    
396        /**
397         * Check all entries of the input array are >= 0.
398         *
399         * @param in array to be tested
400         * @exception IllegalArgumentException if one entry is negative
401         */
402        private void checkNonNegative(long[][] in) throws IllegalArgumentException {
403            for (int i = 0; i < in.length; i ++) {
404                for (int j = 0; j < in[i].length; j++) {
405                    if (in[i][j] < 0) {
406                        throw MathRuntimeException.createIllegalArgumentException(
407                              LocalizedFormats.NEGATIVE_ELEMENT_AT_2D_INDEX,
408                              i, j, in[i][j]);
409                    }
410                }
411            }
412        }
413    
414        /**
415         * Modify the distribution used to compute inference statistics.
416         *
417         * @param value
418         *            the new distribution
419         * @since 1.2
420         */
421        public void setDistribution(ChiSquaredDistribution value) {
422            distribution = value;
423        }
424    }