001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.inference;
018    
019    import org.apache.commons.math.MathException;
020    import org.apache.commons.math.MathRuntimeException;
021    import org.apache.commons.math.distribution.TDistribution;
022    import org.apache.commons.math.distribution.TDistributionImpl;
023    import org.apache.commons.math.exception.util.LocalizedFormats;
024    import org.apache.commons.math.stat.StatUtils;
025    import org.apache.commons.math.stat.descriptive.StatisticalSummary;
026    import org.apache.commons.math.util.FastMath;
027    
028    /**
029     * Implements t-test statistics defined in the {@link TTest} interface.
030     * <p>
031     * Uses commons-math {@link org.apache.commons.math.distribution.TDistributionImpl}
032     * implementation to estimate exact p-values.</p>
033     *
034     * @version $Revision: 1042336 $ $Date: 2010-12-05 13:40:48 +0100 (dim. 05 d??c. 2010) $
035     */
036    public class TTestImpl implements TTest  {
037    
038        /** Distribution used to compute inference statistics.
039         * @deprecated in 2.2 (to be removed in 3.0).
040         */
041        @Deprecated
042        private TDistribution distribution;
043    
044        /**
045         * Default constructor.
046         */
047        public TTestImpl() {
048            this(new TDistributionImpl(1.0));
049        }
050    
051        /**
052         * Create a test instance using the given distribution for computing
053         * inference statistics.
054         * @param t distribution used to compute inference statistics.
055         * @since 1.2
056         * @deprecated in 2.2 (to be removed in 3.0).
057         */
058        @Deprecated
059        public TTestImpl(TDistribution t) {
060            super();
061            setDistribution(t);
062        }
063    
064        /**
065         * Computes a paired, 2-sample t-statistic based on the data in the input
066         * arrays.  The t-statistic returned is equivalent to what would be returned by
067         * computing the one-sample t-statistic {@link #t(double, double[])}, with
068         * <code>mu = 0</code> and the sample array consisting of the (signed)
069         * differences between corresponding entries in <code>sample1</code> and
070         * <code>sample2.</code>
071         * <p>
072         * <strong>Preconditions</strong>: <ul>
073         * <li>The input arrays must have the same length and their common length
074         * must be at least 2.
075         * </li></ul></p>
076         *
077         * @param sample1 array of sample data values
078         * @param sample2 array of sample data values
079         * @return t statistic
080         * @throws IllegalArgumentException if the precondition is not met
081         * @throws MathException if the statistic can not be computed do to a
082         *         convergence or other numerical error.
083         */
084        public double pairedT(double[] sample1, double[] sample2)
085            throws IllegalArgumentException, MathException {
086            checkSampleData(sample1);
087            checkSampleData(sample2);
088            double meanDifference = StatUtils.meanDifference(sample1, sample2);
089            return t(meanDifference, 0,
090                    StatUtils.varianceDifference(sample1, sample2, meanDifference),
091                    sample1.length);
092        }
093    
094         /**
095         * Returns the <i>observed significance level</i>, or
096         * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
097         * based on the data in the input arrays.
098         * <p>
099         * The number returned is the smallest significance level
100         * at which one can reject the null hypothesis that the mean of the paired
101         * differences is 0 in favor of the two-sided alternative that the mean paired
102         * difference is not equal to 0. For a one-sided test, divide the returned
103         * value by 2.</p>
104         * <p>
105         * This test is equivalent to a one-sample t-test computed using
106         * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
107         * array consisting of the signed differences between corresponding elements of
108         * <code>sample1</code> and <code>sample2.</code></p>
109         * <p>
110         * <strong>Usage Note:</strong><br>
111         * The validity of the p-value depends on the assumptions of the parametric
112         * t-test procedure, as discussed
113         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
114         * here</a></p>
115         * <p>
116         * <strong>Preconditions</strong>: <ul>
117         * <li>The input array lengths must be the same and their common length must
118         * be at least 2.
119         * </li></ul></p>
120         *
121         * @param sample1 array of sample data values
122         * @param sample2 array of sample data values
123         * @return p-value for t-test
124         * @throws IllegalArgumentException if the precondition is not met
125         * @throws MathException if an error occurs computing the p-value
126         */
127        public double pairedTTest(double[] sample1, double[] sample2)
128            throws IllegalArgumentException, MathException {
129            double meanDifference = StatUtils.meanDifference(sample1, sample2);
130            return tTest(meanDifference, 0,
131                    StatUtils.varianceDifference(sample1, sample2, meanDifference),
132                    sample1.length);
133        }
134    
135         /**
136         * Performs a paired t-test evaluating the null hypothesis that the
137         * mean of the paired differences between <code>sample1</code> and
138         * <code>sample2</code> is 0 in favor of the two-sided alternative that the
139         * mean paired difference is not equal to 0, with significance level
140         * <code>alpha</code>.
141         * <p>
142         * Returns <code>true</code> iff the null hypothesis can be rejected with
143         * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
144         * <code>alpha * 2</code></p>
145         * <p>
146         * <strong>Usage Note:</strong><br>
147         * The validity of the test depends on the assumptions of the parametric
148         * t-test procedure, as discussed
149         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
150         * here</a></p>
151         * <p>
152         * <strong>Preconditions</strong>: <ul>
153         * <li>The input array lengths must be the same and their common length
154         * must be at least 2.
155         * </li>
156         * <li> <code> 0 < alpha < 0.5 </code>
157         * </li></ul></p>
158         *
159         * @param sample1 array of sample data values
160         * @param sample2 array of sample data values
161         * @param alpha significance level of the test
162         * @return true if the null hypothesis can be rejected with
163         * confidence 1 - alpha
164         * @throws IllegalArgumentException if the preconditions are not met
165         * @throws MathException if an error occurs performing the test
166         */
167        public boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
168            throws IllegalArgumentException, MathException {
169            checkSignificanceLevel(alpha);
170            return pairedTTest(sample1, sample2) < alpha;
171        }
172    
173        /**
174         * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
175         * t statistic </a> given observed values and a comparison constant.
176         * <p>
177         * This statistic can be used to perform a one sample t-test for the mean.
178         * </p><p>
179         * <strong>Preconditions</strong>: <ul>
180         * <li>The observed array length must be at least 2.
181         * </li></ul></p>
182         *
183         * @param mu comparison constant
184         * @param observed array of values
185         * @return t statistic
186         * @throws IllegalArgumentException if input array length is less than 2
187         */
188        public double t(double mu, double[] observed)
189        throws IllegalArgumentException {
190            checkSampleData(observed);
191            return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
192                    observed.length);
193        }
194    
195        /**
196         * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
197         * t statistic </a> to use in comparing the mean of the dataset described by
198         * <code>sampleStats</code> to <code>mu</code>.
199         * <p>
200         * This statistic can be used to perform a one sample t-test for the mean.
201         * </p><p>
202         * <strong>Preconditions</strong>: <ul>
203         * <li><code>observed.getN() > = 2</code>.
204         * </li></ul></p>
205         *
206         * @param mu comparison constant
207         * @param sampleStats DescriptiveStatistics holding sample summary statitstics
208         * @return t statistic
209         * @throws IllegalArgumentException if the precondition is not met
210         */
211        public double t(double mu, StatisticalSummary sampleStats)
212        throws IllegalArgumentException {
213            checkSampleData(sampleStats);
214            return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
215                    sampleStats.getN());
216        }
217    
218        /**
219         * Computes a 2-sample t statistic,  under the hypothesis of equal
220         * subpopulation variances.  To compute a t-statistic without the
221         * equal variances hypothesis, use {@link #t(double[], double[])}.
222         * <p>
223         * This statistic can be used to perform a (homoscedastic) two-sample
224         * t-test to compare sample means.</p>
225         * <p>
226         * The t-statisitc is</p>
227         * <p>
228         * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
229         * </p><p>
230         * where <strong><code>n1</code></strong> is the size of first sample;
231         * <strong><code> n2</code></strong> is the size of second sample;
232         * <strong><code> m1</code></strong> is the mean of first sample;
233         * <strong><code> m2</code></strong> is the mean of second sample</li>
234         * </ul>
235         * and <strong><code>var</code></strong> is the pooled variance estimate:
236         * </p><p>
237         * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
238         * </p><p>
239         * with <strong><code>var1<code></strong> the variance of the first sample and
240         * <strong><code>var2</code></strong> the variance of the second sample.
241         * </p><p>
242         * <strong>Preconditions</strong>: <ul>
243         * <li>The observed array lengths must both be at least 2.
244         * </li></ul></p>
245         *
246         * @param sample1 array of sample data values
247         * @param sample2 array of sample data values
248         * @return t statistic
249         * @throws IllegalArgumentException if the precondition is not met
250         */
251        public double homoscedasticT(double[] sample1, double[] sample2)
252        throws IllegalArgumentException {
253            checkSampleData(sample1);
254            checkSampleData(sample2);
255            return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
256                    StatUtils.variance(sample1), StatUtils.variance(sample2),
257                    sample1.length, sample2.length);
258        }
259    
260        /**
261         * Computes a 2-sample t statistic, without the hypothesis of equal
262         * subpopulation variances.  To compute a t-statistic assuming equal
263         * variances, use {@link #homoscedasticT(double[], double[])}.
264         * <p>
265         * This statistic can be used to perform a two-sample t-test to compare
266         * sample means.</p>
267         * <p>
268         * The t-statisitc is</p>
269         * <p>
270         * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
271         * </p><p>
272         *  where <strong><code>n1</code></strong> is the size of the first sample
273         * <strong><code> n2</code></strong> is the size of the second sample;
274         * <strong><code> m1</code></strong> is the mean of the first sample;
275         * <strong><code> m2</code></strong> is the mean of the second sample;
276         * <strong><code> var1</code></strong> is the variance of the first sample;
277         * <strong><code> var2</code></strong> is the variance of the second sample;
278         * </p><p>
279         * <strong>Preconditions</strong>: <ul>
280         * <li>The observed array lengths must both be at least 2.
281         * </li></ul></p>
282         *
283         * @param sample1 array of sample data values
284         * @param sample2 array of sample data values
285         * @return t statistic
286         * @throws IllegalArgumentException if the precondition is not met
287         */
288        public double t(double[] sample1, double[] sample2)
289        throws IllegalArgumentException {
290            checkSampleData(sample1);
291            checkSampleData(sample2);
292            return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
293                    StatUtils.variance(sample1), StatUtils.variance(sample2),
294                    sample1.length, sample2.length);
295        }
296    
297        /**
298         * Computes a 2-sample t statistic </a>, comparing the means of the datasets
299         * described by two {@link StatisticalSummary} instances, without the
300         * assumption of equal subpopulation variances.  Use
301         * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
302         * compute a t-statistic under the equal variances assumption.
303         * <p>
304         * This statistic can be used to perform a two-sample t-test to compare
305         * sample means.</p>
306         * <p>
307          * The returned  t-statisitc is</p>
308         * <p>
309         * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
310         * </p><p>
311         * where <strong><code>n1</code></strong> is the size of the first sample;
312         * <strong><code> n2</code></strong> is the size of the second sample;
313         * <strong><code> m1</code></strong> is the mean of the first sample;
314         * <strong><code> m2</code></strong> is the mean of the second sample
315         * <strong><code> var1</code></strong> is the variance of the first sample;
316         * <strong><code> var2</code></strong> is the variance of the second sample
317         * </p><p>
318         * <strong>Preconditions</strong>: <ul>
319         * <li>The datasets described by the two Univariates must each contain
320         * at least 2 observations.
321         * </li></ul></p>
322         *
323         * @param sampleStats1 StatisticalSummary describing data from the first sample
324         * @param sampleStats2 StatisticalSummary describing data from the second sample
325         * @return t statistic
326         * @throws IllegalArgumentException if the precondition is not met
327         */
328        public double t(StatisticalSummary sampleStats1,
329                        StatisticalSummary sampleStats2)
330        throws IllegalArgumentException {
331            checkSampleData(sampleStats1);
332            checkSampleData(sampleStats2);
333            return t(sampleStats1.getMean(), sampleStats2.getMean(),
334                    sampleStats1.getVariance(), sampleStats2.getVariance(),
335                    sampleStats1.getN(), sampleStats2.getN());
336        }
337    
338        /**
339         * Computes a 2-sample t statistic, comparing the means of the datasets
340         * described by two {@link StatisticalSummary} instances, under the
341         * assumption of equal subpopulation variances.  To compute a t-statistic
342         * without the equal variances assumption, use
343         * {@link #t(StatisticalSummary, StatisticalSummary)}.
344         * <p>
345         * This statistic can be used to perform a (homoscedastic) two-sample
346         * t-test to compare sample means.</p>
347         * <p>
348         * The t-statisitc returned is</p>
349         * <p>
350         * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
351         * </p><p>
352         * where <strong><code>n1</code></strong> is the size of first sample;
353         * <strong><code> n2</code></strong> is the size of second sample;
354         * <strong><code> m1</code></strong> is the mean of first sample;
355         * <strong><code> m2</code></strong> is the mean of second sample
356         * and <strong><code>var</code></strong> is the pooled variance estimate:
357         * </p><p>
358         * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
359         * <p>
360         * with <strong><code>var1<code></strong> the variance of the first sample and
361         * <strong><code>var2</code></strong> the variance of the second sample.
362         * </p><p>
363         * <strong>Preconditions</strong>: <ul>
364         * <li>The datasets described by the two Univariates must each contain
365         * at least 2 observations.
366         * </li></ul></p>
367         *
368         * @param sampleStats1 StatisticalSummary describing data from the first sample
369         * @param sampleStats2 StatisticalSummary describing data from the second sample
370         * @return t statistic
371         * @throws IllegalArgumentException if the precondition is not met
372         */
373        public double homoscedasticT(StatisticalSummary sampleStats1,
374                StatisticalSummary sampleStats2)
375        throws IllegalArgumentException {
376            checkSampleData(sampleStats1);
377            checkSampleData(sampleStats2);
378            return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
379                    sampleStats1.getVariance(), sampleStats2.getVariance(),
380                    sampleStats1.getN(), sampleStats2.getN());
381        }
382    
383         /**
384         * Returns the <i>observed significance level</i>, or
385         * <i>p-value</i>, associated with a one-sample, two-tailed t-test
386         * comparing the mean of the input array with the constant <code>mu</code>.
387         * <p>
388         * The number returned is the smallest significance level
389         * at which one can reject the null hypothesis that the mean equals
390         * <code>mu</code> in favor of the two-sided alternative that the mean
391         * is different from <code>mu</code>. For a one-sided test, divide the
392         * returned value by 2.</p>
393         * <p>
394         * <strong>Usage Note:</strong><br>
395         * The validity of the test depends on the assumptions of the parametric
396         * t-test procedure, as discussed
397         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
398         * </p><p>
399         * <strong>Preconditions</strong>: <ul>
400         * <li>The observed array length must be at least 2.
401         * </li></ul></p>
402         *
403         * @param mu constant value to compare sample mean against
404         * @param sample array of sample data values
405         * @return p-value
406         * @throws IllegalArgumentException if the precondition is not met
407         * @throws MathException if an error occurs computing the p-value
408         */
409        public double tTest(double mu, double[] sample)
410        throws IllegalArgumentException, MathException {
411            checkSampleData(sample);
412            return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
413                    sample.length);
414        }
415    
416        /**
417         * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
418         * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
419         * which <code>sample</code> is drawn equals <code>mu</code>.
420         * <p>
421         * Returns <code>true</code> iff the null hypothesis can be
422         * rejected with confidence <code>1 - alpha</code>.  To
423         * perform a 1-sided test, use <code>alpha * 2</code>
424         * </p><p>
425         * <strong>Examples:</strong><br><ol>
426         * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
427         * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
428         * </li>
429         * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
430         * at the 99% level, first verify that the measured sample mean is less
431         * than <code>mu</code> and then use
432         * <br><code>tTest(mu, sample, 0.02) </code>
433         * </li></ol></p>
434         * <p>
435         * <strong>Usage Note:</strong><br>
436         * The validity of the test depends on the assumptions of the one-sample
437         * parametric t-test procedure, as discussed
438         * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
439         * </p><p>
440         * <strong>Preconditions</strong>: <ul>
441         * <li>The observed array length must be at least 2.
442         * </li></ul></p>
443         *
444         * @param mu constant value to compare sample mean against
445         * @param sample array of sample data values
446         * @param alpha significance level of the test
447         * @return p-value
448         * @throws IllegalArgumentException if the precondition is not met
449         * @throws MathException if an error computing the p-value
450         */
451        public boolean tTest(double mu, double[] sample, double alpha)
452        throws IllegalArgumentException, MathException {
453            checkSignificanceLevel(alpha);
454            return tTest(mu, sample) < alpha;
455        }
456    
457        /**
458         * Returns the <i>observed significance level</i>, or
459         * <i>p-value</i>, associated with a one-sample, two-tailed t-test
460         * comparing the mean of the dataset described by <code>sampleStats</code>
461         * with the constant <code>mu</code>.
462         * <p>
463         * The number returned is the smallest significance level
464         * at which one can reject the null hypothesis that the mean equals
465         * <code>mu</code> in favor of the two-sided alternative that the mean
466         * is different from <code>mu</code>. For a one-sided test, divide the
467         * returned value by 2.</p>
468         * <p>
469         * <strong>Usage Note:</strong><br>
470         * The validity of the test depends on the assumptions of the parametric
471         * t-test procedure, as discussed
472         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
473         * here</a></p>
474         * <p>
475         * <strong>Preconditions</strong>: <ul>
476         * <li>The sample must contain at least 2 observations.
477         * </li></ul></p>
478         *
479         * @param mu constant value to compare sample mean against
480         * @param sampleStats StatisticalSummary describing sample data
481         * @return p-value
482         * @throws IllegalArgumentException if the precondition is not met
483         * @throws MathException if an error occurs computing the p-value
484         */
485        public double tTest(double mu, StatisticalSummary sampleStats)
486        throws IllegalArgumentException, MathException {
487            checkSampleData(sampleStats);
488            return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
489                    sampleStats.getN());
490        }
491    
492         /**
493         * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
494         * two-sided t-test</a> evaluating the null hypothesis that the mean of the
495         * population from which the dataset described by <code>stats</code> is
496         * drawn equals <code>mu</code>.
497         * <p>
498         * Returns <code>true</code> iff the null hypothesis can be rejected with
499         * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
500         * <code>alpha * 2.</code></p>
501         * <p>
502         * <strong>Examples:</strong><br><ol>
503         * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
504         * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
505         * </li>
506         * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
507         * at the 99% level, first verify that the measured sample mean is less
508         * than <code>mu</code> and then use
509         * <br><code>tTest(mu, sampleStats, 0.02) </code>
510         * </li></ol></p>
511         * <p>
512         * <strong>Usage Note:</strong><br>
513         * The validity of the test depends on the assumptions of the one-sample
514         * parametric t-test procedure, as discussed
515         * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
516         * </p><p>
517         * <strong>Preconditions</strong>: <ul>
518         * <li>The sample must include at least 2 observations.
519         * </li></ul></p>
520         *
521         * @param mu constant value to compare sample mean against
522         * @param sampleStats StatisticalSummary describing sample data values
523         * @param alpha significance level of the test
524         * @return p-value
525         * @throws IllegalArgumentException if the precondition is not met
526         * @throws MathException if an error occurs computing the p-value
527         */
528        public boolean tTest( double mu, StatisticalSummary sampleStats,
529                double alpha)
530        throws IllegalArgumentException, MathException {
531            checkSignificanceLevel(alpha);
532            return tTest(mu, sampleStats) < alpha;
533        }
534    
535        /**
536         * Returns the <i>observed significance level</i>, or
537         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
538         * comparing the means of the input arrays.
539         * <p>
540         * The number returned is the smallest significance level
541         * at which one can reject the null hypothesis that the two means are
542         * equal in favor of the two-sided alternative that they are different.
543         * For a one-sided test, divide the returned value by 2.</p>
544         * <p>
545         * The test does not assume that the underlying popuation variances are
546         * equal  and it uses approximated degrees of freedom computed from the
547         * sample data to compute the p-value.  The t-statistic used is as defined in
548         * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
549         * to the degrees of freedom is used,
550         * as described
551         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
552         * here.</a>  To perform the test under the assumption of equal subpopulation
553         * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
554         * <p>
555         * <strong>Usage Note:</strong><br>
556         * The validity of the p-value depends on the assumptions of the parametric
557         * t-test procedure, as discussed
558         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
559         * here</a></p>
560         * <p>
561         * <strong>Preconditions</strong>: <ul>
562         * <li>The observed array lengths must both be at least 2.
563         * </li></ul></p>
564         *
565         * @param sample1 array of sample data values
566         * @param sample2 array of sample data values
567         * @return p-value for t-test
568         * @throws IllegalArgumentException if the precondition is not met
569         * @throws MathException if an error occurs computing the p-value
570         */
571        public double tTest(double[] sample1, double[] sample2)
572        throws IllegalArgumentException, MathException {
573            checkSampleData(sample1);
574            checkSampleData(sample2);
575            return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
576                    StatUtils.variance(sample1), StatUtils.variance(sample2),
577                    sample1.length, sample2.length);
578        }
579    
580        /**
581         * Returns the <i>observed significance level</i>, or
582         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
583         * comparing the means of the input arrays, under the assumption that
584         * the two samples are drawn from subpopulations with equal variances.
585         * To perform the test without the equal variances assumption, use
586         * {@link #tTest(double[], double[])}.
587         * <p>
588         * The number returned is the smallest significance level
589         * at which one can reject the null hypothesis that the two means are
590         * equal in favor of the two-sided alternative that they are different.
591         * For a one-sided test, divide the returned value by 2.</p>
592         * <p>
593         * A pooled variance estimate is used to compute the t-statistic.  See
594         * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
595         * minus 2 is used as the degrees of freedom.</p>
596         * <p>
597         * <strong>Usage Note:</strong><br>
598         * The validity of the p-value depends on the assumptions of the parametric
599         * t-test procedure, as discussed
600         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
601         * here</a></p>
602         * <p>
603         * <strong>Preconditions</strong>: <ul>
604         * <li>The observed array lengths must both be at least 2.
605         * </li></ul></p>
606         *
607         * @param sample1 array of sample data values
608         * @param sample2 array of sample data values
609         * @return p-value for t-test
610         * @throws IllegalArgumentException if the precondition is not met
611         * @throws MathException if an error occurs computing the p-value
612         */
613        public double homoscedasticTTest(double[] sample1, double[] sample2)
614        throws IllegalArgumentException, MathException {
615            checkSampleData(sample1);
616            checkSampleData(sample2);
617            return homoscedasticTTest(StatUtils.mean(sample1),
618                    StatUtils.mean(sample2), StatUtils.variance(sample1),
619                    StatUtils.variance(sample2), sample1.length,
620                    sample2.length);
621        }
622    
623    
624         /**
625         * Performs a
626         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
627         * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
628         * and <code>sample2</code> are drawn from populations with the same mean,
629         * with significance level <code>alpha</code>.  This test does not assume
630         * that the subpopulation variances are equal.  To perform the test assuming
631         * equal variances, use
632         * {@link #homoscedasticTTest(double[], double[], double)}.
633         * <p>
634         * Returns <code>true</code> iff the null hypothesis that the means are
635         * equal can be rejected with confidence <code>1 - alpha</code>.  To
636         * perform a 1-sided test, use <code>alpha / 2</code></p>
637         * <p>
638         * See {@link #t(double[], double[])} for the formula used to compute the
639         * t-statistic.  Degrees of freedom are approximated using the
640         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
641         * Welch-Satterthwaite approximation.</a></p>
642    
643         * <p>
644         * <strong>Examples:</strong><br><ol>
645         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
646         * the 95% level,  use
647         * <br><code>tTest(sample1, sample2, 0.05). </code>
648         * </li>
649         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
650         * the 99% level, first verify that the measured  mean of <code>sample 1</code>
651         * is less than the mean of <code>sample 2</code> and then use
652         * <br><code>tTest(sample1, sample2, 0.02) </code>
653         * </li></ol></p>
654         * <p>
655         * <strong>Usage Note:</strong><br>
656         * The validity of the test depends on the assumptions of the parametric
657         * t-test procedure, as discussed
658         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
659         * here</a></p>
660         * <p>
661         * <strong>Preconditions</strong>: <ul>
662         * <li>The observed array lengths must both be at least 2.
663         * </li>
664         * <li> <code> 0 < alpha < 0.5 </code>
665         * </li></ul></p>
666         *
667         * @param sample1 array of sample data values
668         * @param sample2 array of sample data values
669         * @param alpha significance level of the test
670         * @return true if the null hypothesis can be rejected with
671         * confidence 1 - alpha
672         * @throws IllegalArgumentException if the preconditions are not met
673         * @throws MathException if an error occurs performing the test
674         */
675        public boolean tTest(double[] sample1, double[] sample2,
676                double alpha)
677        throws IllegalArgumentException, MathException {
678            checkSignificanceLevel(alpha);
679            return tTest(sample1, sample2) < alpha;
680        }
681    
682        /**
683         * Performs a
684         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
685         * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
686         * and <code>sample2</code> are drawn from populations with the same mean,
687         * with significance level <code>alpha</code>,  assuming that the
688         * subpopulation variances are equal.  Use
689         * {@link #tTest(double[], double[], double)} to perform the test without
690         * the assumption of equal variances.
691         * <p>
692         * Returns <code>true</code> iff the null hypothesis that the means are
693         * equal can be rejected with confidence <code>1 - alpha</code>.  To
694         * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
695         * without the assumption of equal subpopulation variances, use
696         * {@link #tTest(double[], double[], double)}.</p>
697         * <p>
698         * A pooled variance estimate is used to compute the t-statistic. See
699         * {@link #t(double[], double[])} for the formula. The sum of the sample
700         * sizes minus 2 is used as the degrees of freedom.</p>
701         * <p>
702         * <strong>Examples:</strong><br><ol>
703         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
704         * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
705         * </li>
706         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
707         * at the 99% level, first verify that the measured mean of
708         * <code>sample 1</code> is less than the mean of <code>sample 2</code>
709         * and then use
710         * <br><code>tTest(sample1, sample2, 0.02) </code>
711         * </li></ol></p>
712         * <p>
713         * <strong>Usage Note:</strong><br>
714         * The validity of the test depends on the assumptions of the parametric
715         * t-test procedure, as discussed
716         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
717         * here</a></p>
718         * <p>
719         * <strong>Preconditions</strong>: <ul>
720         * <li>The observed array lengths must both be at least 2.
721         * </li>
722         * <li> <code> 0 < alpha < 0.5 </code>
723         * </li></ul></p>
724         *
725         * @param sample1 array of sample data values
726         * @param sample2 array of sample data values
727         * @param alpha significance level of the test
728         * @return true if the null hypothesis can be rejected with
729         * confidence 1 - alpha
730         * @throws IllegalArgumentException if the preconditions are not met
731         * @throws MathException if an error occurs performing the test
732         */
733        public boolean homoscedasticTTest(double[] sample1, double[] sample2,
734                double alpha)
735        throws IllegalArgumentException, MathException {
736            checkSignificanceLevel(alpha);
737            return homoscedasticTTest(sample1, sample2) < alpha;
738        }
739    
740         /**
741         * Returns the <i>observed significance level</i>, or
742         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
743         * comparing the means of the datasets described by two StatisticalSummary
744         * instances.
745         * <p>
746         * The number returned is the smallest significance level
747         * at which one can reject the null hypothesis that the two means are
748         * equal in favor of the two-sided alternative that they are different.
749         * For a one-sided test, divide the returned value by 2.</p>
750         * <p>
751         * The test does not assume that the underlying popuation variances are
752         * equal  and it uses approximated degrees of freedom computed from the
753         * sample data to compute the p-value.   To perform the test assuming
754         * equal variances, use
755         * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
756         * <p>
757         * <strong>Usage Note:</strong><br>
758         * The validity of the p-value depends on the assumptions of the parametric
759         * t-test procedure, as discussed
760         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
761         * here</a></p>
762         * <p>
763         * <strong>Preconditions</strong>: <ul>
764         * <li>The datasets described by the two Univariates must each contain
765         * at least 2 observations.
766         * </li></ul></p>
767         *
768         * @param sampleStats1  StatisticalSummary describing data from the first sample
769         * @param sampleStats2  StatisticalSummary describing data from the second sample
770         * @return p-value for t-test
771         * @throws IllegalArgumentException if the precondition is not met
772         * @throws MathException if an error occurs computing the p-value
773         */
774        public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
775        throws IllegalArgumentException, MathException {
776            checkSampleData(sampleStats1);
777            checkSampleData(sampleStats2);
778            return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
779                    sampleStats2.getVariance(), sampleStats1.getN(),
780                    sampleStats2.getN());
781        }
782    
783        /**
784         * Returns the <i>observed significance level</i>, or
785         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
786         * comparing the means of the datasets described by two StatisticalSummary
787         * instances, under the hypothesis of equal subpopulation variances. To
788         * perform a test without the equal variances assumption, use
789         * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
790         * <p>
791         * The number returned is the smallest significance level
792         * at which one can reject the null hypothesis that the two means are
793         * equal in favor of the two-sided alternative that they are different.
794         * For a one-sided test, divide the returned value by 2.</p>
795         * <p>
796         * See {@link #homoscedasticT(double[], double[])} for the formula used to
797         * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
798         * the degrees of freedom.</p>
799         * <p>
800         * <strong>Usage Note:</strong><br>
801         * The validity of the p-value depends on the assumptions of the parametric
802         * t-test procedure, as discussed
803         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
804         * </p><p>
805         * <strong>Preconditions</strong>: <ul>
806         * <li>The datasets described by the two Univariates must each contain
807         * at least 2 observations.
808         * </li></ul></p>
809         *
810         * @param sampleStats1  StatisticalSummary describing data from the first sample
811         * @param sampleStats2  StatisticalSummary describing data from the second sample
812         * @return p-value for t-test
813         * @throws IllegalArgumentException if the precondition is not met
814         * @throws MathException if an error occurs computing the p-value
815         */
816        public double homoscedasticTTest(StatisticalSummary sampleStats1,
817                                         StatisticalSummary sampleStats2)
818        throws IllegalArgumentException, MathException {
819            checkSampleData(sampleStats1);
820            checkSampleData(sampleStats2);
821            return homoscedasticTTest(sampleStats1.getMean(),
822                    sampleStats2.getMean(), sampleStats1.getVariance(),
823                    sampleStats2.getVariance(), sampleStats1.getN(),
824                    sampleStats2.getN());
825        }
826    
827        /**
828         * Performs a
829         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
830         * two-sided t-test</a> evaluating the null hypothesis that
831         * <code>sampleStats1</code> and <code>sampleStats2</code> describe
832         * datasets drawn from populations with the same mean, with significance
833         * level <code>alpha</code>.   This test does not assume that the
834         * subpopulation variances are equal.  To perform the test under the equal
835         * variances assumption, use
836         * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
837         * <p>
838         * Returns <code>true</code> iff the null hypothesis that the means are
839         * equal can be rejected with confidence <code>1 - alpha</code>.  To
840         * perform a 1-sided test, use <code>alpha * 2</code></p>
841         * <p>
842         * See {@link #t(double[], double[])} for the formula used to compute the
843         * t-statistic.  Degrees of freedom are approximated using the
844         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
845         * Welch-Satterthwaite approximation.</a></p>
846         * <p>
847         * <strong>Examples:</strong><br><ol>
848         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
849         * the 95%, use
850         * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
851         * </li>
852         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
853         * at the 99% level,  first verify that the measured mean of
854         * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
855         * and then use
856         * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
857         * </li></ol></p>
858         * <p>
859         * <strong>Usage Note:</strong><br>
860         * The validity of the test depends on the assumptions of the parametric
861         * t-test procedure, as discussed
862         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
863         * here</a></p>
864         * <p>
865         * <strong>Preconditions</strong>: <ul>
866         * <li>The datasets described by the two Univariates must each contain
867         * at least 2 observations.
868         * </li>
869         * <li> <code> 0 < alpha < 0.5 </code>
870         * </li></ul></p>
871         *
872         * @param sampleStats1 StatisticalSummary describing sample data values
873         * @param sampleStats2 StatisticalSummary describing sample data values
874         * @param alpha significance level of the test
875         * @return true if the null hypothesis can be rejected with
876         * confidence 1 - alpha
877         * @throws IllegalArgumentException if the preconditions are not met
878         * @throws MathException if an error occurs performing the test
879         */
880        public boolean tTest(StatisticalSummary sampleStats1,
881                StatisticalSummary sampleStats2, double alpha)
882        throws IllegalArgumentException, MathException {
883            checkSignificanceLevel(alpha);
884            return tTest(sampleStats1, sampleStats2) < alpha;
885        }
886    
887        //----------------------------------------------- Protected methods
888    
889        /**
890         * Computes approximate degrees of freedom for 2-sample t-test.
891         *
892         * @param v1 first sample variance
893         * @param v2 second sample variance
894         * @param n1 first sample n
895         * @param n2 second sample n
896         * @return approximate degrees of freedom
897         */
898        protected double df(double v1, double v2, double n1, double n2) {
899            return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
900            ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
901                    (n2 * n2 * (n2 - 1d)));
902        }
903    
904        /**
905         * Computes t test statistic for 1-sample t-test.
906         *
907         * @param m sample mean
908         * @param mu constant to test against
909         * @param v sample variance
910         * @param n sample n
911         * @return t test statistic
912         */
913        protected double t(double m, double mu, double v, double n) {
914            return (m - mu) / FastMath.sqrt(v / n);
915        }
916    
917        /**
918         * Computes t test statistic for 2-sample t-test.
919         * <p>
920         * Does not assume that subpopulation variances are equal.</p>
921         *
922         * @param m1 first sample mean
923         * @param m2 second sample mean
924         * @param v1 first sample variance
925         * @param v2 second sample variance
926         * @param n1 first sample n
927         * @param n2 second sample n
928         * @return t test statistic
929         */
930        protected double t(double m1, double m2,  double v1, double v2, double n1,
931                double n2)  {
932                return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
933        }
934    
935        /**
936         * Computes t test statistic for 2-sample t-test under the hypothesis
937         * of equal subpopulation variances.
938         *
939         * @param m1 first sample mean
940         * @param m2 second sample mean
941         * @param v1 first sample variance
942         * @param v2 second sample variance
943         * @param n1 first sample n
944         * @param n2 second sample n
945         * @return t test statistic
946         */
947        protected double homoscedasticT(double m1, double m2,  double v1,
948                double v2, double n1, double n2)  {
949                double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
950                return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
951        }
952    
953        /**
954         * Computes p-value for 2-sided, 1-sample t-test.
955         *
956         * @param m sample mean
957         * @param mu constant to test against
958         * @param v sample variance
959         * @param n sample n
960         * @return p-value
961         * @throws MathException if an error occurs computing the p-value
962         */
963        protected double tTest(double m, double mu, double v, double n)
964        throws MathException {
965            double t = FastMath.abs(t(m, mu, v, n));
966            distribution.setDegreesOfFreedom(n - 1);
967            return 2.0 * distribution.cumulativeProbability(-t);
968        }
969    
970        /**
971         * Computes p-value for 2-sided, 2-sample t-test.
972         * <p>
973         * Does not assume subpopulation variances are equal. Degrees of freedom
974         * are estimated from the data.</p>
975         *
976         * @param m1 first sample mean
977         * @param m2 second sample mean
978         * @param v1 first sample variance
979         * @param v2 second sample variance
980         * @param n1 first sample n
981         * @param n2 second sample n
982         * @return p-value
983         * @throws MathException if an error occurs computing the p-value
984         */
985        protected double tTest(double m1, double m2, double v1, double v2,
986                double n1, double n2)
987        throws MathException {
988            double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
989            double degreesOfFreedom = 0;
990            degreesOfFreedom = df(v1, v2, n1, n2);
991            distribution.setDegreesOfFreedom(degreesOfFreedom);
992            return 2.0 * distribution.cumulativeProbability(-t);
993        }
994    
995        /**
996         * Computes p-value for 2-sided, 2-sample t-test, under the assumption
997         * of equal subpopulation variances.
998         * <p>
999         * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1000         *
1001         * @param m1 first sample mean
1002         * @param m2 second sample mean
1003         * @param v1 first sample variance
1004         * @param v2 second sample variance
1005         * @param n1 first sample n
1006         * @param n2 second sample n
1007         * @return p-value
1008         * @throws MathException if an error occurs computing the p-value
1009         */
1010        protected double homoscedasticTTest(double m1, double m2, double v1,
1011                double v2, double n1, double n2)
1012        throws MathException {
1013            double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1014            double degreesOfFreedom = n1 + n2 - 2;
1015            distribution.setDegreesOfFreedom(degreesOfFreedom);
1016            return 2.0 * distribution.cumulativeProbability(-t);
1017        }
1018    
1019        /**
1020         * Modify the distribution used to compute inference statistics.
1021         * @param value the new distribution
1022         * @since 1.2
1023         * @deprecated in 2.2 (to be removed in 3.0).
1024         */
1025        @Deprecated
1026        public void setDistribution(TDistribution value) {
1027            distribution = value;
1028        }
1029    
1030        /** Check significance level.
1031         * @param alpha significance level
1032         * @exception IllegalArgumentException if significance level is out of bounds
1033         */
1034        private void checkSignificanceLevel(final double alpha)
1035            throws IllegalArgumentException {
1036            if ((alpha <= 0) || (alpha > 0.5)) {
1037                throw MathRuntimeException.createIllegalArgumentException(
1038                      LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
1039                      alpha, 0.0, 0.5);
1040            }
1041        }
1042    
1043        /** Check sample data.
1044         * @param data sample data
1045         * @exception IllegalArgumentException if there is not enough sample data
1046         */
1047        private void checkSampleData(final double[] data)
1048            throws IllegalArgumentException {
1049            if ((data == null) || (data.length < 2)) {
1050                throw MathRuntimeException.createIllegalArgumentException(
1051                      LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1052                      (data == null) ? 0 : data.length);
1053            }
1054        }
1055    
1056        /** Check sample data.
1057         * @param stat statistical summary
1058         * @exception IllegalArgumentException if there is not enough sample data
1059         */
1060        private void checkSampleData(final StatisticalSummary stat)
1061            throws IllegalArgumentException {
1062            if ((stat == null) || (stat.getN() < 2)) {
1063                throw MathRuntimeException.createIllegalArgumentException(
1064                      LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1065                      (stat == null) ? 0 : stat.getN());
1066            }
1067        }
1068    
1069    }