001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat.inference;
018    
019    import org.apache.commons.math3.distribution.TDistribution;
020    import org.apache.commons.math3.exception.DimensionMismatchException;
021    import org.apache.commons.math3.exception.MathIllegalArgumentException;
022    import org.apache.commons.math3.exception.MaxCountExceededException;
023    import org.apache.commons.math3.exception.NoDataException;
024    import org.apache.commons.math3.exception.NotStrictlyPositiveException;
025    import org.apache.commons.math3.exception.NullArgumentException;
026    import org.apache.commons.math3.exception.NumberIsTooSmallException;
027    import org.apache.commons.math3.exception.OutOfRangeException;
028    import org.apache.commons.math3.exception.util.LocalizedFormats;
029    import org.apache.commons.math3.stat.StatUtils;
030    import org.apache.commons.math3.stat.descriptive.StatisticalSummary;
031    import org.apache.commons.math3.util.FastMath;
032    
033    /**
034     * An implementation for Student's t-tests.
035     * <p>
036     * Tests can be:<ul>
037     * <li>One-sample or two-sample</li>
038     * <li>One-sided or two-sided</li>
039     * <li>Paired or unpaired (for two-sample tests)</li>
040     * <li>Homoscedastic (equal variance assumption) or heteroscedastic
041     * (for two sample tests)</li>
042     * <li>Fixed significance level (boolean-valued) or returning p-values.
043     * </li></ul></p>
044     * <p>
045     * Test statistics are available for all tests.  Methods including "Test" in
046     * in their names perform tests, all other methods return t-statistics.  Among
047     * the "Test" methods, <code>double-</code>valued methods return p-values;
048     * <code>boolean-</code>valued methods perform fixed significance level tests.
049     * Significance levels are always specified as numbers between 0 and 0.5
050     * (e.g. tests at the 95% level  use <code>alpha=0.05</code>).</p>
051     * <p>
052     * Input to tests can be either <code>double[]</code> arrays or
053     * {@link StatisticalSummary} instances.</p><p>
054     * Uses commons-math {@link org.apache.commons.math3.distribution.TDistribution}
055     * implementation to estimate exact p-values.</p>
056     *
057     * @version $Id: TTest.java 1416643 2012-12-03 19:37:14Z tn $
058     */
059    public class TTest {
060        /**
061         * Computes a paired, 2-sample t-statistic based on the data in the input
062         * arrays.  The t-statistic returned is equivalent to what would be returned by
063         * computing the one-sample t-statistic {@link #t(double, double[])}, with
064         * <code>mu = 0</code> and the sample array consisting of the (signed)
065         * differences between corresponding entries in <code>sample1</code> and
066         * <code>sample2.</code>
067         * <p>
068         * <strong>Preconditions</strong>: <ul>
069         * <li>The input arrays must have the same length and their common length
070         * must be at least 2.
071         * </li></ul></p>
072         *
073         * @param sample1 array of sample data values
074         * @param sample2 array of sample data values
075         * @return t statistic
076         * @throws NullArgumentException if the arrays are <code>null</code>
077         * @throws NoDataException if the arrays are empty
078         * @throws DimensionMismatchException if the length of the arrays is not equal
079         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
080         */
081        public double pairedT(final double[] sample1, final double[] sample2)
082            throws NullArgumentException, NoDataException,
083            DimensionMismatchException, NumberIsTooSmallException {
084    
085            checkSampleData(sample1);
086            checkSampleData(sample2);
087            double meanDifference = StatUtils.meanDifference(sample1, sample2);
088            return t(meanDifference, 0,
089                     StatUtils.varianceDifference(sample1, sample2, meanDifference),
090                     sample1.length);
091    
092        }
093    
094        /**
095         * Returns the <i>observed significance level</i>, or
096         * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
097         * based on the data in the input arrays.
098         * <p>
099         * The number returned is the smallest significance level
100         * at which one can reject the null hypothesis that the mean of the paired
101         * differences is 0 in favor of the two-sided alternative that the mean paired
102         * difference is not equal to 0. For a one-sided test, divide the returned
103         * value by 2.</p>
104         * <p>
105         * This test is equivalent to a one-sample t-test computed using
106         * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
107         * array consisting of the signed differences between corresponding elements of
108         * <code>sample1</code> and <code>sample2.</code></p>
109         * <p>
110         * <strong>Usage Note:</strong><br>
111         * The validity of the p-value depends on the assumptions of the parametric
112         * t-test procedure, as discussed
113         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
114         * here</a></p>
115         * <p>
116         * <strong>Preconditions</strong>: <ul>
117         * <li>The input array lengths must be the same and their common length must
118         * be at least 2.
119         * </li></ul></p>
120         *
121         * @param sample1 array of sample data values
122         * @param sample2 array of sample data values
123         * @return p-value for t-test
124         * @throws NullArgumentException if the arrays are <code>null</code>
125         * @throws NoDataException if the arrays are empty
126         * @throws DimensionMismatchException if the length of the arrays is not equal
127         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
128         * @throws MaxCountExceededException if an error occurs computing the p-value
129         */
130        public double pairedTTest(final double[] sample1, final double[] sample2)
131            throws NullArgumentException, NoDataException, DimensionMismatchException,
132            NumberIsTooSmallException, MaxCountExceededException {
133    
134            double meanDifference = StatUtils.meanDifference(sample1, sample2);
135            return tTest(meanDifference, 0,
136                    StatUtils.varianceDifference(sample1, sample2, meanDifference),
137                    sample1.length);
138    
139        }
140    
141        /**
142         * Performs a paired t-test evaluating the null hypothesis that the
143         * mean of the paired differences between <code>sample1</code> and
144         * <code>sample2</code> is 0 in favor of the two-sided alternative that the
145         * mean paired difference is not equal to 0, with significance level
146         * <code>alpha</code>.
147         * <p>
148         * Returns <code>true</code> iff the null hypothesis can be rejected with
149         * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
150         * <code>alpha * 2</code></p>
151         * <p>
152         * <strong>Usage Note:</strong><br>
153         * The validity of the test depends on the assumptions of the parametric
154         * t-test procedure, as discussed
155         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
156         * here</a></p>
157         * <p>
158         * <strong>Preconditions</strong>: <ul>
159         * <li>The input array lengths must be the same and their common length
160         * must be at least 2.
161         * </li>
162         * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
163         * </li></ul></p>
164         *
165         * @param sample1 array of sample data values
166         * @param sample2 array of sample data values
167         * @param alpha significance level of the test
168         * @return true if the null hypothesis can be rejected with
169         * confidence 1 - alpha
170         * @throws NullArgumentException if the arrays are <code>null</code>
171         * @throws NoDataException if the arrays are empty
172         * @throws DimensionMismatchException if the length of the arrays is not equal
173         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
174         * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
175         * @throws MaxCountExceededException if an error occurs computing the p-value
176         */
177        public boolean pairedTTest(final double[] sample1, final double[] sample2,
178                                   final double alpha)
179            throws NullArgumentException, NoDataException, DimensionMismatchException,
180            NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
181    
182            checkSignificanceLevel(alpha);
183            return pairedTTest(sample1, sample2) < alpha;
184    
185        }
186    
187        /**
188         * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
189         * t statistic </a> given observed values and a comparison constant.
190         * <p>
191         * This statistic can be used to perform a one sample t-test for the mean.
192         * </p><p>
193         * <strong>Preconditions</strong>: <ul>
194         * <li>The observed array length must be at least 2.
195         * </li></ul></p>
196         *
197         * @param mu comparison constant
198         * @param observed array of values
199         * @return t statistic
200         * @throws NullArgumentException if <code>observed</code> is <code>null</code>
201         * @throws NumberIsTooSmallException if the length of <code>observed</code> is &lt; 2
202         */
203        public double t(final double mu, final double[] observed)
204            throws NullArgumentException, NumberIsTooSmallException {
205    
206            checkSampleData(observed);
207            // No try-catch or advertised exception because args have just been checked
208            return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
209                    observed.length);
210    
211        }
212    
213        /**
214         * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
215         * t statistic </a> to use in comparing the mean of the dataset described by
216         * <code>sampleStats</code> to <code>mu</code>.
217         * <p>
218         * This statistic can be used to perform a one sample t-test for the mean.
219         * </p><p>
220         * <strong>Preconditions</strong>: <ul>
221         * <li><code>observed.getN() &ge; 2</code>.
222         * </li></ul></p>
223         *
224         * @param mu comparison constant
225         * @param sampleStats DescriptiveStatistics holding sample summary statitstics
226         * @return t statistic
227         * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
228         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
229         */
230        public double t(final double mu, final StatisticalSummary sampleStats)
231            throws NullArgumentException, NumberIsTooSmallException {
232    
233            checkSampleData(sampleStats);
234            return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
235                     sampleStats.getN());
236    
237        }
238    
239        /**
240         * Computes a 2-sample t statistic,  under the hypothesis of equal
241         * subpopulation variances.  To compute a t-statistic without the
242         * equal variances hypothesis, use {@link #t(double[], double[])}.
243         * <p>
244         * This statistic can be used to perform a (homoscedastic) two-sample
245         * t-test to compare sample means.</p>
246         * <p>
247         * The t-statistic is</p>
248         * <p>
249         * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
250         * </p><p>
251         * where <strong><code>n1</code></strong> is the size of first sample;
252         * <strong><code> n2</code></strong> is the size of second sample;
253         * <strong><code> m1</code></strong> is the mean of first sample;
254         * <strong><code> m2</code></strong> is the mean of second sample</li>
255         * </ul>
256         * and <strong><code>var</code></strong> is the pooled variance estimate:
257         * </p><p>
258         * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
259         * </p><p>
260         * with <strong><code>var1</code></strong> the variance of the first sample and
261         * <strong><code>var2</code></strong> the variance of the second sample.
262         * </p><p>
263         * <strong>Preconditions</strong>: <ul>
264         * <li>The observed array lengths must both be at least 2.
265         * </li></ul></p>
266         *
267         * @param sample1 array of sample data values
268         * @param sample2 array of sample data values
269         * @return t statistic
270         * @throws NullArgumentException if the arrays are <code>null</code>
271         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
272         */
273        public double homoscedasticT(final double[] sample1, final double[] sample2)
274            throws NullArgumentException, NumberIsTooSmallException {
275    
276            checkSampleData(sample1);
277            checkSampleData(sample2);
278            // No try-catch or advertised exception because args have just been checked
279            return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
280                                  StatUtils.variance(sample1), StatUtils.variance(sample2),
281                                  sample1.length, sample2.length);
282    
283        }
284    
285        /**
286         * Computes a 2-sample t statistic, without the hypothesis of equal
287         * subpopulation variances.  To compute a t-statistic assuming equal
288         * variances, use {@link #homoscedasticT(double[], double[])}.
289         * <p>
290         * This statistic can be used to perform a two-sample t-test to compare
291         * sample means.</p>
292         * <p>
293         * The t-statistic is</p>
294         * <p>
295         * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
296         * </p><p>
297         *  where <strong><code>n1</code></strong> is the size of the first sample
298         * <strong><code> n2</code></strong> is the size of the second sample;
299         * <strong><code> m1</code></strong> is the mean of the first sample;
300         * <strong><code> m2</code></strong> is the mean of the second sample;
301         * <strong><code> var1</code></strong> is the variance of the first sample;
302         * <strong><code> var2</code></strong> is the variance of the second sample;
303         * </p><p>
304         * <strong>Preconditions</strong>: <ul>
305         * <li>The observed array lengths must both be at least 2.
306         * </li></ul></p>
307         *
308         * @param sample1 array of sample data values
309         * @param sample2 array of sample data values
310         * @return t statistic
311         * @throws NullArgumentException if the arrays are <code>null</code>
312         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
313         */
314        public double t(final double[] sample1, final double[] sample2)
315            throws NullArgumentException, NumberIsTooSmallException {
316    
317            checkSampleData(sample1);
318            checkSampleData(sample2);
319            // No try-catch or advertised exception because args have just been checked
320            return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
321                     StatUtils.variance(sample1), StatUtils.variance(sample2),
322                     sample1.length, sample2.length);
323    
324        }
325    
326        /**
327         * Computes a 2-sample t statistic </a>, comparing the means of the datasets
328         * described by two {@link StatisticalSummary} instances, without the
329         * assumption of equal subpopulation variances.  Use
330         * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
331         * compute a t-statistic under the equal variances assumption.
332         * <p>
333         * This statistic can be used to perform a two-sample t-test to compare
334         * sample means.</p>
335         * <p>
336          * The returned  t-statistic is</p>
337         * <p>
338         * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
339         * </p><p>
340         * where <strong><code>n1</code></strong> is the size of the first sample;
341         * <strong><code> n2</code></strong> is the size of the second sample;
342         * <strong><code> m1</code></strong> is the mean of the first sample;
343         * <strong><code> m2</code></strong> is the mean of the second sample
344         * <strong><code> var1</code></strong> is the variance of the first sample;
345         * <strong><code> var2</code></strong> is the variance of the second sample
346         * </p><p>
347         * <strong>Preconditions</strong>: <ul>
348         * <li>The datasets described by the two Univariates must each contain
349         * at least 2 observations.
350         * </li></ul></p>
351         *
352         * @param sampleStats1 StatisticalSummary describing data from the first sample
353         * @param sampleStats2 StatisticalSummary describing data from the second sample
354         * @return t statistic
355         * @throws NullArgumentException if the sample statistics are <code>null</code>
356         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
357         */
358        public double t(final StatisticalSummary sampleStats1,
359                        final StatisticalSummary sampleStats2)
360            throws NullArgumentException, NumberIsTooSmallException {
361    
362            checkSampleData(sampleStats1);
363            checkSampleData(sampleStats2);
364            return t(sampleStats1.getMean(), sampleStats2.getMean(),
365                     sampleStats1.getVariance(), sampleStats2.getVariance(),
366                     sampleStats1.getN(), sampleStats2.getN());
367    
368        }
369    
370        /**
371         * Computes a 2-sample t statistic, comparing the means of the datasets
372         * described by two {@link StatisticalSummary} instances, under the
373         * assumption of equal subpopulation variances.  To compute a t-statistic
374         * without the equal variances assumption, use
375         * {@link #t(StatisticalSummary, StatisticalSummary)}.
376         * <p>
377         * This statistic can be used to perform a (homoscedastic) two-sample
378         * t-test to compare sample means.</p>
379         * <p>
380         * The t-statistic returned is</p>
381         * <p>
382         * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
383         * </p><p>
384         * where <strong><code>n1</code></strong> is the size of first sample;
385         * <strong><code> n2</code></strong> is the size of second sample;
386         * <strong><code> m1</code></strong> is the mean of first sample;
387         * <strong><code> m2</code></strong> is the mean of second sample
388         * and <strong><code>var</code></strong> is the pooled variance estimate:
389         * </p><p>
390         * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
391         * </p><p>
392         * with <strong><code>var1</code></strong> the variance of the first sample and
393         * <strong><code>var2</code></strong> the variance of the second sample.
394         * </p><p>
395         * <strong>Preconditions</strong>: <ul>
396         * <li>The datasets described by the two Univariates must each contain
397         * at least 2 observations.
398         * </li></ul></p>
399         *
400         * @param sampleStats1 StatisticalSummary describing data from the first sample
401         * @param sampleStats2 StatisticalSummary describing data from the second sample
402         * @return t statistic
403         * @throws NullArgumentException if the sample statistics are <code>null</code>
404         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
405         */
406        public double homoscedasticT(final StatisticalSummary sampleStats1,
407                                     final StatisticalSummary sampleStats2)
408            throws NullArgumentException, NumberIsTooSmallException {
409    
410            checkSampleData(sampleStats1);
411            checkSampleData(sampleStats2);
412            return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
413                                  sampleStats1.getVariance(), sampleStats2.getVariance(),
414                                  sampleStats1.getN(), sampleStats2.getN());
415    
416        }
417    
418        /**
419         * Returns the <i>observed significance level</i>, or
420         * <i>p-value</i>, associated with a one-sample, two-tailed t-test
421         * comparing the mean of the input array with the constant <code>mu</code>.
422         * <p>
423         * The number returned is the smallest significance level
424         * at which one can reject the null hypothesis that the mean equals
425         * <code>mu</code> in favor of the two-sided alternative that the mean
426         * is different from <code>mu</code>. For a one-sided test, divide the
427         * returned value by 2.</p>
428         * <p>
429         * <strong>Usage Note:</strong><br>
430         * The validity of the test depends on the assumptions of the parametric
431         * t-test procedure, as discussed
432         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
433         * </p><p>
434         * <strong>Preconditions</strong>: <ul>
435         * <li>The observed array length must be at least 2.
436         * </li></ul></p>
437         *
438         * @param mu constant value to compare sample mean against
439         * @param sample array of sample data values
440         * @return p-value
441         * @throws NullArgumentException if the sample array is <code>null</code>
442         * @throws NumberIsTooSmallException if the length of the array is &lt; 2
443         * @throws MaxCountExceededException if an error occurs computing the p-value
444         */
445        public double tTest(final double mu, final double[] sample)
446            throws NullArgumentException, NumberIsTooSmallException,
447            MaxCountExceededException {
448    
449            checkSampleData(sample);
450            // No try-catch or advertised exception because args have just been checked
451            return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
452                         sample.length);
453    
454        }
455    
456        /**
457         * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
458         * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
459         * which <code>sample</code> is drawn equals <code>mu</code>.
460         * <p>
461         * Returns <code>true</code> iff the null hypothesis can be
462         * rejected with confidence <code>1 - alpha</code>.  To
463         * perform a 1-sided test, use <code>alpha * 2</code></p>
464         * <p>
465         * <strong>Examples:</strong><br><ol>
466         * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
467         * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
468         * </li>
469         * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
470         * at the 99% level, first verify that the measured sample mean is less
471         * than <code>mu</code> and then use
472         * <br><code>tTest(mu, sample, 0.02) </code>
473         * </li></ol></p>
474         * <p>
475         * <strong>Usage Note:</strong><br>
476         * The validity of the test depends on the assumptions of the one-sample
477         * parametric t-test procedure, as discussed
478         * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
479         * </p><p>
480         * <strong>Preconditions</strong>: <ul>
481         * <li>The observed array length must be at least 2.
482         * </li></ul></p>
483         *
484         * @param mu constant value to compare sample mean against
485         * @param sample array of sample data values
486         * @param alpha significance level of the test
487         * @return p-value
488         * @throws NullArgumentException if the sample array is <code>null</code>
489         * @throws NumberIsTooSmallException if the length of the array is &lt; 2
490         * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
491         * @throws MaxCountExceededException if an error computing the p-value
492         */
493        public boolean tTest(final double mu, final double[] sample, final double alpha)
494            throws NullArgumentException, NumberIsTooSmallException,
495            OutOfRangeException, MaxCountExceededException {
496    
497            checkSignificanceLevel(alpha);
498            return tTest(mu, sample) < alpha;
499    
500        }
501    
502        /**
503         * Returns the <i>observed significance level</i>, or
504         * <i>p-value</i>, associated with a one-sample, two-tailed t-test
505         * comparing the mean of the dataset described by <code>sampleStats</code>
506         * with the constant <code>mu</code>.
507         * <p>
508         * The number returned is the smallest significance level
509         * at which one can reject the null hypothesis that the mean equals
510         * <code>mu</code> in favor of the two-sided alternative that the mean
511         * is different from <code>mu</code>. For a one-sided test, divide the
512         * returned value by 2.</p>
513         * <p>
514         * <strong>Usage Note:</strong><br>
515         * The validity of the test depends on the assumptions of the parametric
516         * t-test procedure, as discussed
517         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
518         * here</a></p>
519         * <p>
520         * <strong>Preconditions</strong>: <ul>
521         * <li>The sample must contain at least 2 observations.
522         * </li></ul></p>
523         *
524         * @param mu constant value to compare sample mean against
525         * @param sampleStats StatisticalSummary describing sample data
526         * @return p-value
527         * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
528         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
529         * @throws MaxCountExceededException if an error occurs computing the p-value
530         */
531        public double tTest(final double mu, final StatisticalSummary sampleStats)
532            throws NullArgumentException, NumberIsTooSmallException,
533            MaxCountExceededException {
534    
535            checkSampleData(sampleStats);
536            return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
537                         sampleStats.getN());
538    
539        }
540    
541        /**
542         * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
543         * two-sided t-test</a> evaluating the null hypothesis that the mean of the
544         * population from which the dataset described by <code>stats</code> is
545         * drawn equals <code>mu</code>.
546         * <p>
547         * Returns <code>true</code> iff the null hypothesis can be rejected with
548         * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
549         * <code>alpha * 2.</code></p>
550         * <p>
551         * <strong>Examples:</strong><br><ol>
552         * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
553         * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
554         * </li>
555         * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
556         * at the 99% level, first verify that the measured sample mean is less
557         * than <code>mu</code> and then use
558         * <br><code>tTest(mu, sampleStats, 0.02) </code>
559         * </li></ol></p>
560         * <p>
561         * <strong>Usage Note:</strong><br>
562         * The validity of the test depends on the assumptions of the one-sample
563         * parametric t-test procedure, as discussed
564         * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
565         * </p><p>
566         * <strong>Preconditions</strong>: <ul>
567         * <li>The sample must include at least 2 observations.
568         * </li></ul></p>
569         *
570         * @param mu constant value to compare sample mean against
571         * @param sampleStats StatisticalSummary describing sample data values
572         * @param alpha significance level of the test
573         * @return p-value
574         * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
575         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
576         * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
577         * @throws MaxCountExceededException if an error occurs computing the p-value
578         */
579        public boolean tTest(final double mu, final StatisticalSummary sampleStats,
580                             final double alpha)
581        throws NullArgumentException, NumberIsTooSmallException,
582        OutOfRangeException, MaxCountExceededException {
583    
584            checkSignificanceLevel(alpha);
585            return tTest(mu, sampleStats) < alpha;
586    
587        }
588    
589        /**
590         * Returns the <i>observed significance level</i>, or
591         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
592         * comparing the means of the input arrays.
593         * <p>
594         * The number returned is the smallest significance level
595         * at which one can reject the null hypothesis that the two means are
596         * equal in favor of the two-sided alternative that they are different.
597         * For a one-sided test, divide the returned value by 2.</p>
598         * <p>
599         * The test does not assume that the underlying popuation variances are
600         * equal  and it uses approximated degrees of freedom computed from the
601         * sample data to compute the p-value.  The t-statistic used is as defined in
602         * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
603         * to the degrees of freedom is used,
604         * as described
605         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
606         * here.</a>  To perform the test under the assumption of equal subpopulation
607         * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
608         * <p>
609         * <strong>Usage Note:</strong><br>
610         * The validity of the p-value depends on the assumptions of the parametric
611         * t-test procedure, as discussed
612         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
613         * here</a></p>
614         * <p>
615         * <strong>Preconditions</strong>: <ul>
616         * <li>The observed array lengths must both be at least 2.
617         * </li></ul></p>
618         *
619         * @param sample1 array of sample data values
620         * @param sample2 array of sample data values
621         * @return p-value for t-test
622         * @throws NullArgumentException if the arrays are <code>null</code>
623         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
624         * @throws MaxCountExceededException if an error occurs computing the p-value
625         */
626        public double tTest(final double[] sample1, final double[] sample2)
627            throws NullArgumentException, NumberIsTooSmallException,
628            MaxCountExceededException {
629    
630            checkSampleData(sample1);
631            checkSampleData(sample2);
632            // No try-catch or advertised exception because args have just been checked
633            return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
634                         StatUtils.variance(sample1), StatUtils.variance(sample2),
635                         sample1.length, sample2.length);
636    
637        }
638    
639        /**
640         * Returns the <i>observed significance level</i>, or
641         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
642         * comparing the means of the input arrays, under the assumption that
643         * the two samples are drawn from subpopulations with equal variances.
644         * To perform the test without the equal variances assumption, use
645         * {@link #tTest(double[], double[])}.</p>
646         * <p>
647         * The number returned is the smallest significance level
648         * at which one can reject the null hypothesis that the two means are
649         * equal in favor of the two-sided alternative that they are different.
650         * For a one-sided test, divide the returned value by 2.</p>
651         * <p>
652         * A pooled variance estimate is used to compute the t-statistic.  See
653         * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
654         * minus 2 is used as the degrees of freedom.</p>
655         * <p>
656         * <strong>Usage Note:</strong><br>
657         * The validity of the p-value depends on the assumptions of the parametric
658         * t-test procedure, as discussed
659         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
660         * here</a></p>
661         * <p>
662         * <strong>Preconditions</strong>: <ul>
663         * <li>The observed array lengths must both be at least 2.
664         * </li></ul></p>
665         *
666         * @param sample1 array of sample data values
667         * @param sample2 array of sample data values
668         * @return p-value for t-test
669         * @throws NullArgumentException if the arrays are <code>null</code>
670         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
671         * @throws MaxCountExceededException if an error occurs computing the p-value
672         */
673        public double homoscedasticTTest(final double[] sample1, final double[] sample2)
674            throws NullArgumentException, NumberIsTooSmallException,
675            MaxCountExceededException {
676    
677            checkSampleData(sample1);
678            checkSampleData(sample2);
679            // No try-catch or advertised exception because args have just been checked
680            return homoscedasticTTest(StatUtils.mean(sample1),
681                                      StatUtils.mean(sample2),
682                                      StatUtils.variance(sample1),
683                                      StatUtils.variance(sample2),
684                                      sample1.length, sample2.length);
685    
686        }
687    
688        /**
689         * Performs a
690         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
691         * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
692         * and <code>sample2</code> are drawn from populations with the same mean,
693         * with significance level <code>alpha</code>.  This test does not assume
694         * that the subpopulation variances are equal.  To perform the test assuming
695         * equal variances, use
696         * {@link #homoscedasticTTest(double[], double[], double)}.
697         * <p>
698         * Returns <code>true</code> iff the null hypothesis that the means are
699         * equal can be rejected with confidence <code>1 - alpha</code>.  To
700         * perform a 1-sided test, use <code>alpha * 2</code></p>
701         * <p>
702         * See {@link #t(double[], double[])} for the formula used to compute the
703         * t-statistic.  Degrees of freedom are approximated using the
704         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
705         * Welch-Satterthwaite approximation.</a></p>
706         * <p>
707         * <strong>Examples:</strong><br><ol>
708         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
709         * the 95% level,  use
710         * <br><code>tTest(sample1, sample2, 0.05). </code>
711         * </li>
712         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
713         * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
714         * is less than the mean of <code>sample 2</code> and then use
715         * <br><code>tTest(sample1, sample2, 0.02) </code>
716         * </li></ol></p>
717         * <p>
718         * <strong>Usage Note:</strong><br>
719         * The validity of the test depends on the assumptions of the parametric
720         * t-test procedure, as discussed
721         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
722         * here</a></p>
723         * <p>
724         * <strong>Preconditions</strong>: <ul>
725         * <li>The observed array lengths must both be at least 2.
726         * </li>
727         * <li> <code> 0 < alpha < 0.5 </code>
728         * </li></ul></p>
729         *
730         * @param sample1 array of sample data values
731         * @param sample2 array of sample data values
732         * @param alpha significance level of the test
733         * @return true if the null hypothesis can be rejected with
734         * confidence 1 - alpha
735         * @throws NullArgumentException if the arrays are <code>null</code>
736         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
737         * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
738         * @throws MaxCountExceededException if an error occurs computing the p-value
739         */
740        public boolean tTest(final double[] sample1, final double[] sample2,
741                             final double alpha)
742            throws NullArgumentException, NumberIsTooSmallException,
743            OutOfRangeException, MaxCountExceededException {
744    
745            checkSignificanceLevel(alpha);
746            return tTest(sample1, sample2) < alpha;
747    
748        }
749    
750        /**
751         * Performs a
752         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
753         * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
754         * and <code>sample2</code> are drawn from populations with the same mean,
755         * with significance level <code>alpha</code>,  assuming that the
756         * subpopulation variances are equal.  Use
757         * {@link #tTest(double[], double[], double)} to perform the test without
758         * the assumption of equal variances.
759         * <p>
760         * Returns <code>true</code> iff the null hypothesis that the means are
761         * equal can be rejected with confidence <code>1 - alpha</code>.  To
762         * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
763         * without the assumption of equal subpopulation variances, use
764         * {@link #tTest(double[], double[], double)}.</p>
765         * <p>
766         * A pooled variance estimate is used to compute the t-statistic. See
767         * {@link #t(double[], double[])} for the formula. The sum of the sample
768         * sizes minus 2 is used as the degrees of freedom.</p>
769         * <p>
770         * <strong>Examples:</strong><br><ol>
771         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
772         * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
773         * </li>
774         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
775         * at the 99% level, first verify that the measured mean of
776         * <code>sample 1</code> is less than the mean of <code>sample 2</code>
777         * and then use
778         * <br><code>tTest(sample1, sample2, 0.02) </code>
779         * </li></ol></p>
780         * <p>
781         * <strong>Usage Note:</strong><br>
782         * The validity of the test depends on the assumptions of the parametric
783         * t-test procedure, as discussed
784         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
785         * here</a></p>
786         * <p>
787         * <strong>Preconditions</strong>: <ul>
788         * <li>The observed array lengths must both be at least 2.
789         * </li>
790         * <li> <code> 0 < alpha < 0.5 </code>
791         * </li></ul></p>
792         *
793         * @param sample1 array of sample data values
794         * @param sample2 array of sample data values
795         * @param alpha significance level of the test
796         * @return true if the null hypothesis can be rejected with
797         * confidence 1 - alpha
798         * @throws NullArgumentException if the arrays are <code>null</code>
799         * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
800         * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
801         * @throws MaxCountExceededException if an error occurs computing the p-value
802         */
803        public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
804                                          final double alpha)
805            throws NullArgumentException, NumberIsTooSmallException,
806            OutOfRangeException, MaxCountExceededException {
807    
808            checkSignificanceLevel(alpha);
809            return homoscedasticTTest(sample1, sample2) < alpha;
810    
811        }
812    
813        /**
814         * Returns the <i>observed significance level</i>, or
815         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
816         * comparing the means of the datasets described by two StatisticalSummary
817         * instances.
818         * <p>
819         * The number returned is the smallest significance level
820         * at which one can reject the null hypothesis that the two means are
821         * equal in favor of the two-sided alternative that they are different.
822         * For a one-sided test, divide the returned value by 2.</p>
823         * <p>
824         * The test does not assume that the underlying population variances are
825         * equal  and it uses approximated degrees of freedom computed from the
826         * sample data to compute the p-value.   To perform the test assuming
827         * equal variances, use
828         * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
829         * <p>
830         * <strong>Usage Note:</strong><br>
831         * The validity of the p-value depends on the assumptions of the parametric
832         * t-test procedure, as discussed
833         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
834         * here</a></p>
835         * <p>
836         * <strong>Preconditions</strong>: <ul>
837         * <li>The datasets described by the two Univariates must each contain
838         * at least 2 observations.
839         * </li></ul></p>
840         *
841         * @param sampleStats1  StatisticalSummary describing data from the first sample
842         * @param sampleStats2  StatisticalSummary describing data from the second sample
843         * @return p-value for t-test
844         * @throws NullArgumentException if the sample statistics are <code>null</code>
845         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
846         * @throws MaxCountExceededException if an error occurs computing the p-value
847         */
848        public double tTest(final StatisticalSummary sampleStats1,
849                            final StatisticalSummary sampleStats2)
850            throws NullArgumentException, NumberIsTooSmallException,
851            MaxCountExceededException {
852    
853            checkSampleData(sampleStats1);
854            checkSampleData(sampleStats2);
855            return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
856                         sampleStats1.getVariance(), sampleStats2.getVariance(),
857                         sampleStats1.getN(), sampleStats2.getN());
858    
859        }
860    
861        /**
862         * Returns the <i>observed significance level</i>, or
863         * <i>p-value</i>, associated with a two-sample, two-tailed t-test
864         * comparing the means of the datasets described by two StatisticalSummary
865         * instances, under the hypothesis of equal subpopulation variances. To
866         * perform a test without the equal variances assumption, use
867         * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
868         * <p>
869         * The number returned is the smallest significance level
870         * at which one can reject the null hypothesis that the two means are
871         * equal in favor of the two-sided alternative that they are different.
872         * For a one-sided test, divide the returned value by 2.</p>
873         * <p>
874         * See {@link #homoscedasticT(double[], double[])} for the formula used to
875         * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
876         * the degrees of freedom.</p>
877         * <p>
878         * <strong>Usage Note:</strong><br>
879         * The validity of the p-value depends on the assumptions of the parametric
880         * t-test procedure, as discussed
881         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
882         * </p><p>
883         * <strong>Preconditions</strong>: <ul>
884         * <li>The datasets described by the two Univariates must each contain
885         * at least 2 observations.
886         * </li></ul></p>
887         *
888         * @param sampleStats1  StatisticalSummary describing data from the first sample
889         * @param sampleStats2  StatisticalSummary describing data from the second sample
890         * @return p-value for t-test
891         * @throws NullArgumentException if the sample statistics are <code>null</code>
892         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
893         * @throws MaxCountExceededException if an error occurs computing the p-value
894         */
895        public double homoscedasticTTest(final StatisticalSummary sampleStats1,
896                                         final StatisticalSummary sampleStats2)
897            throws NullArgumentException, NumberIsTooSmallException,
898            MaxCountExceededException {
899    
900            checkSampleData(sampleStats1);
901            checkSampleData(sampleStats2);
902            return homoscedasticTTest(sampleStats1.getMean(),
903                                      sampleStats2.getMean(),
904                                      sampleStats1.getVariance(),
905                                      sampleStats2.getVariance(),
906                                      sampleStats1.getN(), sampleStats2.getN());
907    
908        }
909    
910        /**
911         * Performs a
912         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
913         * two-sided t-test</a> evaluating the null hypothesis that
914         * <code>sampleStats1</code> and <code>sampleStats2</code> describe
915         * datasets drawn from populations with the same mean, with significance
916         * level <code>alpha</code>.   This test does not assume that the
917         * subpopulation variances are equal.  To perform the test under the equal
918         * variances assumption, use
919         * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
920         * <p>
921         * Returns <code>true</code> iff the null hypothesis that the means are
922         * equal can be rejected with confidence <code>1 - alpha</code>.  To
923         * perform a 1-sided test, use <code>alpha * 2</code></p>
924         * <p>
925         * See {@link #t(double[], double[])} for the formula used to compute the
926         * t-statistic.  Degrees of freedom are approximated using the
927         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
928         * Welch-Satterthwaite approximation.</a></p>
929         * <p>
930         * <strong>Examples:</strong><br><ol>
931         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
932         * the 95%, use
933         * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
934         * </li>
935         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
936         * at the 99% level,  first verify that the measured mean of
937         * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
938         * and then use
939         * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
940         * </li></ol></p>
941         * <p>
942         * <strong>Usage Note:</strong><br>
943         * The validity of the test depends on the assumptions of the parametric
944         * t-test procedure, as discussed
945         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
946         * here</a></p>
947         * <p>
948         * <strong>Preconditions</strong>: <ul>
949         * <li>The datasets described by the two Univariates must each contain
950         * at least 2 observations.
951         * </li>
952         * <li> <code> 0 < alpha < 0.5 </code>
953         * </li></ul></p>
954         *
955         * @param sampleStats1 StatisticalSummary describing sample data values
956         * @param sampleStats2 StatisticalSummary describing sample data values
957         * @param alpha significance level of the test
958         * @return true if the null hypothesis can be rejected with
959         * confidence 1 - alpha
960         * @throws NullArgumentException if the sample statistics are <code>null</code>
961         * @throws NumberIsTooSmallException if the number of samples is &lt; 2
962         * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
963         * @throws MaxCountExceededException if an error occurs computing the p-value
964         */
965        public boolean tTest(final StatisticalSummary sampleStats1,
966                             final StatisticalSummary sampleStats2,
967                             final double alpha)
968            throws NullArgumentException, NumberIsTooSmallException,
969            OutOfRangeException, MaxCountExceededException {
970    
971            checkSignificanceLevel(alpha);
972            return tTest(sampleStats1, sampleStats2) < alpha;
973    
974        }
975    
976        //----------------------------------------------- Protected methods
977    
978        /**
979         * Computes approximate degrees of freedom for 2-sample t-test.
980         *
981         * @param v1 first sample variance
982         * @param v2 second sample variance
983         * @param n1 first sample n
984         * @param n2 second sample n
985         * @return approximate degrees of freedom
986         */
987        protected double df(double v1, double v2, double n1, double n2) {
988            return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
989            ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
990                    (n2 * n2 * (n2 - 1d)));
991        }
992    
993        /**
994         * Computes t test statistic for 1-sample t-test.
995         *
996         * @param m sample mean
997         * @param mu constant to test against
998         * @param v sample variance
999         * @param n sample n
1000         * @return t test statistic
1001         */
1002        protected double t(final double m, final double mu,
1003                           final double v, final double n) {
1004            return (m - mu) / FastMath.sqrt(v / n);
1005        }
1006    
1007        /**
1008         * Computes t test statistic for 2-sample t-test.
1009         * <p>
1010         * Does not assume that subpopulation variances are equal.</p>
1011         *
1012         * @param m1 first sample mean
1013         * @param m2 second sample mean
1014         * @param v1 first sample variance
1015         * @param v2 second sample variance
1016         * @param n1 first sample n
1017         * @param n2 second sample n
1018         * @return t test statistic
1019         */
1020        protected double t(final double m1, final double m2,
1021                           final double v1, final double v2,
1022                           final double n1, final double n2)  {
1023            return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
1024        }
1025    
1026        /**
1027         * Computes t test statistic for 2-sample t-test under the hypothesis
1028         * of equal subpopulation variances.
1029         *
1030         * @param m1 first sample mean
1031         * @param m2 second sample mean
1032         * @param v1 first sample variance
1033         * @param v2 second sample variance
1034         * @param n1 first sample n
1035         * @param n2 second sample n
1036         * @return t test statistic
1037         */
1038        protected double homoscedasticT(final double m1, final double m2,
1039                                        final double v1, final double v2,
1040                                        final double n1, final double n2)  {
1041            final double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1042            return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1043        }
1044    
1045        /**
1046         * Computes p-value for 2-sided, 1-sample t-test.
1047         *
1048         * @param m sample mean
1049         * @param mu constant to test against
1050         * @param v sample variance
1051         * @param n sample n
1052         * @return p-value
1053         * @throws MaxCountExceededException if an error occurs computing the p-value
1054         * @throws MathIllegalArgumentException if n is not greater than 1
1055         */
1056        protected double tTest(final double m, final double mu,
1057                               final double v, final double n)
1058            throws MaxCountExceededException, MathIllegalArgumentException {
1059    
1060            double t = FastMath.abs(t(m, mu, v, n));
1061            TDistribution distribution = new TDistribution(n - 1);
1062            return 2.0 * distribution.cumulativeProbability(-t);
1063    
1064        }
1065    
1066        /**
1067         * Computes p-value for 2-sided, 2-sample t-test.
1068         * <p>
1069         * Does not assume subpopulation variances are equal. Degrees of freedom
1070         * are estimated from the data.</p>
1071         *
1072         * @param m1 first sample mean
1073         * @param m2 second sample mean
1074         * @param v1 first sample variance
1075         * @param v2 second sample variance
1076         * @param n1 first sample n
1077         * @param n2 second sample n
1078         * @return p-value
1079         * @throws MaxCountExceededException if an error occurs computing the p-value
1080         * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1081         * strictly positive
1082         */
1083        protected double tTest(final double m1, final double m2,
1084                               final double v1, final double v2,
1085                               final double n1, final double n2)
1086            throws MaxCountExceededException, NotStrictlyPositiveException {
1087    
1088            final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
1089            final double degreesOfFreedom = df(v1, v2, n1, n2);
1090            TDistribution distribution = new TDistribution(degreesOfFreedom);
1091            return 2.0 * distribution.cumulativeProbability(-t);
1092    
1093        }
1094    
1095        /**
1096         * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1097         * of equal subpopulation variances.
1098         * <p>
1099         * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1100         *
1101         * @param m1 first sample mean
1102         * @param m2 second sample mean
1103         * @param v1 first sample variance
1104         * @param v2 second sample variance
1105         * @param n1 first sample n
1106         * @param n2 second sample n
1107         * @return p-value
1108         * @throws MaxCountExceededException if an error occurs computing the p-value
1109         * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1110         * strictly positive
1111         */
1112        protected double homoscedasticTTest(double m1, double m2,
1113                                            double v1, double v2,
1114                                            double n1, double n2)
1115            throws MaxCountExceededException, NotStrictlyPositiveException {
1116    
1117            final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1118            final double degreesOfFreedom = n1 + n2 - 2;
1119            TDistribution distribution = new TDistribution(degreesOfFreedom);
1120            return 2.0 * distribution.cumulativeProbability(-t);
1121    
1122        }
1123    
1124        /**
1125         * Check significance level.
1126         *
1127         * @param alpha significance level
1128         * @throws OutOfRangeException if the significance level is out of bounds.
1129         */
1130        private void checkSignificanceLevel(final double alpha)
1131            throws OutOfRangeException {
1132    
1133            if (alpha <= 0 || alpha > 0.5) {
1134                throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1135                                              alpha, 0.0, 0.5);
1136            }
1137    
1138        }
1139    
1140        /**
1141         * Check sample data.
1142         *
1143         * @param data Sample data.
1144         * @throws NullArgumentException if {@code data} is {@code null}.
1145         * @throws NumberIsTooSmallException if there is not enough sample data.
1146         */
1147        private void checkSampleData(final double[] data)
1148            throws NullArgumentException, NumberIsTooSmallException {
1149    
1150            if (data == null) {
1151                throw new NullArgumentException();
1152            }
1153            if (data.length < 2) {
1154                throw new NumberIsTooSmallException(
1155                        LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1156                        data.length, 2, true);
1157            }
1158    
1159        }
1160    
1161        /**
1162         * Check sample data.
1163         *
1164         * @param stat Statistical summary.
1165         * @throws NullArgumentException if {@code data} is {@code null}.
1166         * @throws NumberIsTooSmallException if there is not enough sample data.
1167         */
1168        private void checkSampleData(final StatisticalSummary stat)
1169            throws NullArgumentException, NumberIsTooSmallException {
1170    
1171            if (stat == null) {
1172                throw new NullArgumentException();
1173            }
1174            if (stat.getN() < 2) {
1175                throw new NumberIsTooSmallException(
1176                        LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1177                        stat.getN(), 2, true);
1178            }
1179    
1180        }
1181    
1182    }