001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math3.stat.inference; 018 019 import org.apache.commons.math3.distribution.TDistribution; 020 import org.apache.commons.math3.exception.DimensionMismatchException; 021 import org.apache.commons.math3.exception.MathIllegalArgumentException; 022 import org.apache.commons.math3.exception.MaxCountExceededException; 023 import org.apache.commons.math3.exception.NoDataException; 024 import org.apache.commons.math3.exception.NotStrictlyPositiveException; 025 import org.apache.commons.math3.exception.NullArgumentException; 026 import org.apache.commons.math3.exception.NumberIsTooSmallException; 027 import org.apache.commons.math3.exception.OutOfRangeException; 028 import org.apache.commons.math3.exception.util.LocalizedFormats; 029 import org.apache.commons.math3.stat.StatUtils; 030 import org.apache.commons.math3.stat.descriptive.StatisticalSummary; 031 import org.apache.commons.math3.util.FastMath; 032 033 /** 034 * An implementation for Student's t-tests. 035 * <p> 036 * Tests can be:<ul> 037 * <li>One-sample or two-sample</li> 038 * <li>One-sided or two-sided</li> 039 * <li>Paired or unpaired (for two-sample tests)</li> 040 * <li>Homoscedastic (equal variance assumption) or heteroscedastic 041 * (for two sample tests)</li> 042 * <li>Fixed significance level (boolean-valued) or returning p-values. 043 * </li></ul></p> 044 * <p> 045 * Test statistics are available for all tests. Methods including "Test" in 046 * in their names perform tests, all other methods return t-statistics. Among 047 * the "Test" methods, <code>double-</code>valued methods return p-values; 048 * <code>boolean-</code>valued methods perform fixed significance level tests. 049 * Significance levels are always specified as numbers between 0 and 0.5 050 * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p> 051 * <p> 052 * Input to tests can be either <code>double[]</code> arrays or 053 * {@link StatisticalSummary} instances.</p><p> 054 * Uses commons-math {@link org.apache.commons.math3.distribution.TDistribution} 055 * implementation to estimate exact p-values.</p> 056 * 057 * @version $Id: TTest.java 1416643 2012-12-03 19:37:14Z tn $ 058 */ 059 public class TTest { 060 /** 061 * Computes a paired, 2-sample t-statistic based on the data in the input 062 * arrays. The t-statistic returned is equivalent to what would be returned by 063 * computing the one-sample t-statistic {@link #t(double, double[])}, with 064 * <code>mu = 0</code> and the sample array consisting of the (signed) 065 * differences between corresponding entries in <code>sample1</code> and 066 * <code>sample2.</code> 067 * <p> 068 * <strong>Preconditions</strong>: <ul> 069 * <li>The input arrays must have the same length and their common length 070 * must be at least 2. 071 * </li></ul></p> 072 * 073 * @param sample1 array of sample data values 074 * @param sample2 array of sample data values 075 * @return t statistic 076 * @throws NullArgumentException if the arrays are <code>null</code> 077 * @throws NoDataException if the arrays are empty 078 * @throws DimensionMismatchException if the length of the arrays is not equal 079 * @throws NumberIsTooSmallException if the length of the arrays is < 2 080 */ 081 public double pairedT(final double[] sample1, final double[] sample2) 082 throws NullArgumentException, NoDataException, 083 DimensionMismatchException, NumberIsTooSmallException { 084 085 checkSampleData(sample1); 086 checkSampleData(sample2); 087 double meanDifference = StatUtils.meanDifference(sample1, sample2); 088 return t(meanDifference, 0, 089 StatUtils.varianceDifference(sample1, sample2, meanDifference), 090 sample1.length); 091 092 } 093 094 /** 095 * Returns the <i>observed significance level</i>, or 096 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test 097 * based on the data in the input arrays. 098 * <p> 099 * The number returned is the smallest significance level 100 * at which one can reject the null hypothesis that the mean of the paired 101 * differences is 0 in favor of the two-sided alternative that the mean paired 102 * difference is not equal to 0. For a one-sided test, divide the returned 103 * value by 2.</p> 104 * <p> 105 * This test is equivalent to a one-sample t-test computed using 106 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample 107 * array consisting of the signed differences between corresponding elements of 108 * <code>sample1</code> and <code>sample2.</code></p> 109 * <p> 110 * <strong>Usage Note:</strong><br> 111 * The validity of the p-value depends on the assumptions of the parametric 112 * t-test procedure, as discussed 113 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 114 * here</a></p> 115 * <p> 116 * <strong>Preconditions</strong>: <ul> 117 * <li>The input array lengths must be the same and their common length must 118 * be at least 2. 119 * </li></ul></p> 120 * 121 * @param sample1 array of sample data values 122 * @param sample2 array of sample data values 123 * @return p-value for t-test 124 * @throws NullArgumentException if the arrays are <code>null</code> 125 * @throws NoDataException if the arrays are empty 126 * @throws DimensionMismatchException if the length of the arrays is not equal 127 * @throws NumberIsTooSmallException if the length of the arrays is < 2 128 * @throws MaxCountExceededException if an error occurs computing the p-value 129 */ 130 public double pairedTTest(final double[] sample1, final double[] sample2) 131 throws NullArgumentException, NoDataException, DimensionMismatchException, 132 NumberIsTooSmallException, MaxCountExceededException { 133 134 double meanDifference = StatUtils.meanDifference(sample1, sample2); 135 return tTest(meanDifference, 0, 136 StatUtils.varianceDifference(sample1, sample2, meanDifference), 137 sample1.length); 138 139 } 140 141 /** 142 * Performs a paired t-test evaluating the null hypothesis that the 143 * mean of the paired differences between <code>sample1</code> and 144 * <code>sample2</code> is 0 in favor of the two-sided alternative that the 145 * mean paired difference is not equal to 0, with significance level 146 * <code>alpha</code>. 147 * <p> 148 * Returns <code>true</code> iff the null hypothesis can be rejected with 149 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 150 * <code>alpha * 2</code></p> 151 * <p> 152 * <strong>Usage Note:</strong><br> 153 * The validity of the test depends on the assumptions of the parametric 154 * t-test procedure, as discussed 155 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 156 * here</a></p> 157 * <p> 158 * <strong>Preconditions</strong>: <ul> 159 * <li>The input array lengths must be the same and their common length 160 * must be at least 2. 161 * </li> 162 * <li> <code> 0 < alpha < 0.5 </code> 163 * </li></ul></p> 164 * 165 * @param sample1 array of sample data values 166 * @param sample2 array of sample data values 167 * @param alpha significance level of the test 168 * @return true if the null hypothesis can be rejected with 169 * confidence 1 - alpha 170 * @throws NullArgumentException if the arrays are <code>null</code> 171 * @throws NoDataException if the arrays are empty 172 * @throws DimensionMismatchException if the length of the arrays is not equal 173 * @throws NumberIsTooSmallException if the length of the arrays is < 2 174 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 175 * @throws MaxCountExceededException if an error occurs computing the p-value 176 */ 177 public boolean pairedTTest(final double[] sample1, final double[] sample2, 178 final double alpha) 179 throws NullArgumentException, NoDataException, DimensionMismatchException, 180 NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException { 181 182 checkSignificanceLevel(alpha); 183 return pairedTTest(sample1, sample2) < alpha; 184 185 } 186 187 /** 188 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 189 * t statistic </a> given observed values and a comparison constant. 190 * <p> 191 * This statistic can be used to perform a one sample t-test for the mean. 192 * </p><p> 193 * <strong>Preconditions</strong>: <ul> 194 * <li>The observed array length must be at least 2. 195 * </li></ul></p> 196 * 197 * @param mu comparison constant 198 * @param observed array of values 199 * @return t statistic 200 * @throws NullArgumentException if <code>observed</code> is <code>null</code> 201 * @throws NumberIsTooSmallException if the length of <code>observed</code> is < 2 202 */ 203 public double t(final double mu, final double[] observed) 204 throws NullArgumentException, NumberIsTooSmallException { 205 206 checkSampleData(observed); 207 // No try-catch or advertised exception because args have just been checked 208 return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), 209 observed.length); 210 211 } 212 213 /** 214 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 215 * t statistic </a> to use in comparing the mean of the dataset described by 216 * <code>sampleStats</code> to <code>mu</code>. 217 * <p> 218 * This statistic can be used to perform a one sample t-test for the mean. 219 * </p><p> 220 * <strong>Preconditions</strong>: <ul> 221 * <li><code>observed.getN() ≥ 2</code>. 222 * </li></ul></p> 223 * 224 * @param mu comparison constant 225 * @param sampleStats DescriptiveStatistics holding sample summary statitstics 226 * @return t statistic 227 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 228 * @throws NumberIsTooSmallException if the number of samples is < 2 229 */ 230 public double t(final double mu, final StatisticalSummary sampleStats) 231 throws NullArgumentException, NumberIsTooSmallException { 232 233 checkSampleData(sampleStats); 234 return t(sampleStats.getMean(), mu, sampleStats.getVariance(), 235 sampleStats.getN()); 236 237 } 238 239 /** 240 * Computes a 2-sample t statistic, under the hypothesis of equal 241 * subpopulation variances. To compute a t-statistic without the 242 * equal variances hypothesis, use {@link #t(double[], double[])}. 243 * <p> 244 * This statistic can be used to perform a (homoscedastic) two-sample 245 * t-test to compare sample means.</p> 246 * <p> 247 * The t-statistic is</p> 248 * <p> 249 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 250 * </p><p> 251 * where <strong><code>n1</code></strong> is the size of first sample; 252 * <strong><code> n2</code></strong> is the size of second sample; 253 * <strong><code> m1</code></strong> is the mean of first sample; 254 * <strong><code> m2</code></strong> is the mean of second sample</li> 255 * </ul> 256 * and <strong><code>var</code></strong> is the pooled variance estimate: 257 * </p><p> 258 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 259 * </p><p> 260 * with <strong><code>var1</code></strong> the variance of the first sample and 261 * <strong><code>var2</code></strong> the variance of the second sample. 262 * </p><p> 263 * <strong>Preconditions</strong>: <ul> 264 * <li>The observed array lengths must both be at least 2. 265 * </li></ul></p> 266 * 267 * @param sample1 array of sample data values 268 * @param sample2 array of sample data values 269 * @return t statistic 270 * @throws NullArgumentException if the arrays are <code>null</code> 271 * @throws NumberIsTooSmallException if the length of the arrays is < 2 272 */ 273 public double homoscedasticT(final double[] sample1, final double[] sample2) 274 throws NullArgumentException, NumberIsTooSmallException { 275 276 checkSampleData(sample1); 277 checkSampleData(sample2); 278 // No try-catch or advertised exception because args have just been checked 279 return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2), 280 StatUtils.variance(sample1), StatUtils.variance(sample2), 281 sample1.length, sample2.length); 282 283 } 284 285 /** 286 * Computes a 2-sample t statistic, without the hypothesis of equal 287 * subpopulation variances. To compute a t-statistic assuming equal 288 * variances, use {@link #homoscedasticT(double[], double[])}. 289 * <p> 290 * This statistic can be used to perform a two-sample t-test to compare 291 * sample means.</p> 292 * <p> 293 * The t-statistic is</p> 294 * <p> 295 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 296 * </p><p> 297 * where <strong><code>n1</code></strong> is the size of the first sample 298 * <strong><code> n2</code></strong> is the size of the second sample; 299 * <strong><code> m1</code></strong> is the mean of the first sample; 300 * <strong><code> m2</code></strong> is the mean of the second sample; 301 * <strong><code> var1</code></strong> is the variance of the first sample; 302 * <strong><code> var2</code></strong> is the variance of the second sample; 303 * </p><p> 304 * <strong>Preconditions</strong>: <ul> 305 * <li>The observed array lengths must both be at least 2. 306 * </li></ul></p> 307 * 308 * @param sample1 array of sample data values 309 * @param sample2 array of sample data values 310 * @return t statistic 311 * @throws NullArgumentException if the arrays are <code>null</code> 312 * @throws NumberIsTooSmallException if the length of the arrays is < 2 313 */ 314 public double t(final double[] sample1, final double[] sample2) 315 throws NullArgumentException, NumberIsTooSmallException { 316 317 checkSampleData(sample1); 318 checkSampleData(sample2); 319 // No try-catch or advertised exception because args have just been checked 320 return t(StatUtils.mean(sample1), StatUtils.mean(sample2), 321 StatUtils.variance(sample1), StatUtils.variance(sample2), 322 sample1.length, sample2.length); 323 324 } 325 326 /** 327 * Computes a 2-sample t statistic </a>, comparing the means of the datasets 328 * described by two {@link StatisticalSummary} instances, without the 329 * assumption of equal subpopulation variances. Use 330 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to 331 * compute a t-statistic under the equal variances assumption. 332 * <p> 333 * This statistic can be used to perform a two-sample t-test to compare 334 * sample means.</p> 335 * <p> 336 * The returned t-statistic is</p> 337 * <p> 338 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 339 * </p><p> 340 * where <strong><code>n1</code></strong> is the size of the first sample; 341 * <strong><code> n2</code></strong> is the size of the second sample; 342 * <strong><code> m1</code></strong> is the mean of the first sample; 343 * <strong><code> m2</code></strong> is the mean of the second sample 344 * <strong><code> var1</code></strong> is the variance of the first sample; 345 * <strong><code> var2</code></strong> is the variance of the second sample 346 * </p><p> 347 * <strong>Preconditions</strong>: <ul> 348 * <li>The datasets described by the two Univariates must each contain 349 * at least 2 observations. 350 * </li></ul></p> 351 * 352 * @param sampleStats1 StatisticalSummary describing data from the first sample 353 * @param sampleStats2 StatisticalSummary describing data from the second sample 354 * @return t statistic 355 * @throws NullArgumentException if the sample statistics are <code>null</code> 356 * @throws NumberIsTooSmallException if the number of samples is < 2 357 */ 358 public double t(final StatisticalSummary sampleStats1, 359 final StatisticalSummary sampleStats2) 360 throws NullArgumentException, NumberIsTooSmallException { 361 362 checkSampleData(sampleStats1); 363 checkSampleData(sampleStats2); 364 return t(sampleStats1.getMean(), sampleStats2.getMean(), 365 sampleStats1.getVariance(), sampleStats2.getVariance(), 366 sampleStats1.getN(), sampleStats2.getN()); 367 368 } 369 370 /** 371 * Computes a 2-sample t statistic, comparing the means of the datasets 372 * described by two {@link StatisticalSummary} instances, under the 373 * assumption of equal subpopulation variances. To compute a t-statistic 374 * without the equal variances assumption, use 375 * {@link #t(StatisticalSummary, StatisticalSummary)}. 376 * <p> 377 * This statistic can be used to perform a (homoscedastic) two-sample 378 * t-test to compare sample means.</p> 379 * <p> 380 * The t-statistic returned is</p> 381 * <p> 382 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 383 * </p><p> 384 * where <strong><code>n1</code></strong> is the size of first sample; 385 * <strong><code> n2</code></strong> is the size of second sample; 386 * <strong><code> m1</code></strong> is the mean of first sample; 387 * <strong><code> m2</code></strong> is the mean of second sample 388 * and <strong><code>var</code></strong> is the pooled variance estimate: 389 * </p><p> 390 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 391 * </p><p> 392 * with <strong><code>var1</code></strong> the variance of the first sample and 393 * <strong><code>var2</code></strong> the variance of the second sample. 394 * </p><p> 395 * <strong>Preconditions</strong>: <ul> 396 * <li>The datasets described by the two Univariates must each contain 397 * at least 2 observations. 398 * </li></ul></p> 399 * 400 * @param sampleStats1 StatisticalSummary describing data from the first sample 401 * @param sampleStats2 StatisticalSummary describing data from the second sample 402 * @return t statistic 403 * @throws NullArgumentException if the sample statistics are <code>null</code> 404 * @throws NumberIsTooSmallException if the number of samples is < 2 405 */ 406 public double homoscedasticT(final StatisticalSummary sampleStats1, 407 final StatisticalSummary sampleStats2) 408 throws NullArgumentException, NumberIsTooSmallException { 409 410 checkSampleData(sampleStats1); 411 checkSampleData(sampleStats2); 412 return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), 413 sampleStats1.getVariance(), sampleStats2.getVariance(), 414 sampleStats1.getN(), sampleStats2.getN()); 415 416 } 417 418 /** 419 * Returns the <i>observed significance level</i>, or 420 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 421 * comparing the mean of the input array with the constant <code>mu</code>. 422 * <p> 423 * The number returned is the smallest significance level 424 * at which one can reject the null hypothesis that the mean equals 425 * <code>mu</code> in favor of the two-sided alternative that the mean 426 * is different from <code>mu</code>. For a one-sided test, divide the 427 * returned value by 2.</p> 428 * <p> 429 * <strong>Usage Note:</strong><br> 430 * The validity of the test depends on the assumptions of the parametric 431 * t-test procedure, as discussed 432 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 433 * </p><p> 434 * <strong>Preconditions</strong>: <ul> 435 * <li>The observed array length must be at least 2. 436 * </li></ul></p> 437 * 438 * @param mu constant value to compare sample mean against 439 * @param sample array of sample data values 440 * @return p-value 441 * @throws NullArgumentException if the sample array is <code>null</code> 442 * @throws NumberIsTooSmallException if the length of the array is < 2 443 * @throws MaxCountExceededException if an error occurs computing the p-value 444 */ 445 public double tTest(final double mu, final double[] sample) 446 throws NullArgumentException, NumberIsTooSmallException, 447 MaxCountExceededException { 448 449 checkSampleData(sample); 450 // No try-catch or advertised exception because args have just been checked 451 return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample), 452 sample.length); 453 454 } 455 456 /** 457 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 458 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from 459 * which <code>sample</code> is drawn equals <code>mu</code>. 460 * <p> 461 * Returns <code>true</code> iff the null hypothesis can be 462 * rejected with confidence <code>1 - alpha</code>. To 463 * perform a 1-sided test, use <code>alpha * 2</code></p> 464 * <p> 465 * <strong>Examples:</strong><br><ol> 466 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 467 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code> 468 * </li> 469 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 470 * at the 99% level, first verify that the measured sample mean is less 471 * than <code>mu</code> and then use 472 * <br><code>tTest(mu, sample, 0.02) </code> 473 * </li></ol></p> 474 * <p> 475 * <strong>Usage Note:</strong><br> 476 * The validity of the test depends on the assumptions of the one-sample 477 * parametric t-test procedure, as discussed 478 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 479 * </p><p> 480 * <strong>Preconditions</strong>: <ul> 481 * <li>The observed array length must be at least 2. 482 * </li></ul></p> 483 * 484 * @param mu constant value to compare sample mean against 485 * @param sample array of sample data values 486 * @param alpha significance level of the test 487 * @return p-value 488 * @throws NullArgumentException if the sample array is <code>null</code> 489 * @throws NumberIsTooSmallException if the length of the array is < 2 490 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 491 * @throws MaxCountExceededException if an error computing the p-value 492 */ 493 public boolean tTest(final double mu, final double[] sample, final double alpha) 494 throws NullArgumentException, NumberIsTooSmallException, 495 OutOfRangeException, MaxCountExceededException { 496 497 checkSignificanceLevel(alpha); 498 return tTest(mu, sample) < alpha; 499 500 } 501 502 /** 503 * Returns the <i>observed significance level</i>, or 504 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 505 * comparing the mean of the dataset described by <code>sampleStats</code> 506 * with the constant <code>mu</code>. 507 * <p> 508 * The number returned is the smallest significance level 509 * at which one can reject the null hypothesis that the mean equals 510 * <code>mu</code> in favor of the two-sided alternative that the mean 511 * is different from <code>mu</code>. For a one-sided test, divide the 512 * returned value by 2.</p> 513 * <p> 514 * <strong>Usage Note:</strong><br> 515 * The validity of the test depends on the assumptions of the parametric 516 * t-test procedure, as discussed 517 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 518 * here</a></p> 519 * <p> 520 * <strong>Preconditions</strong>: <ul> 521 * <li>The sample must contain at least 2 observations. 522 * </li></ul></p> 523 * 524 * @param mu constant value to compare sample mean against 525 * @param sampleStats StatisticalSummary describing sample data 526 * @return p-value 527 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 528 * @throws NumberIsTooSmallException if the number of samples is < 2 529 * @throws MaxCountExceededException if an error occurs computing the p-value 530 */ 531 public double tTest(final double mu, final StatisticalSummary sampleStats) 532 throws NullArgumentException, NumberIsTooSmallException, 533 MaxCountExceededException { 534 535 checkSampleData(sampleStats); 536 return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), 537 sampleStats.getN()); 538 539 } 540 541 /** 542 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 543 * two-sided t-test</a> evaluating the null hypothesis that the mean of the 544 * population from which the dataset described by <code>stats</code> is 545 * drawn equals <code>mu</code>. 546 * <p> 547 * Returns <code>true</code> iff the null hypothesis can be rejected with 548 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 549 * <code>alpha * 2.</code></p> 550 * <p> 551 * <strong>Examples:</strong><br><ol> 552 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 553 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code> 554 * </li> 555 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 556 * at the 99% level, first verify that the measured sample mean is less 557 * than <code>mu</code> and then use 558 * <br><code>tTest(mu, sampleStats, 0.02) </code> 559 * </li></ol></p> 560 * <p> 561 * <strong>Usage Note:</strong><br> 562 * The validity of the test depends on the assumptions of the one-sample 563 * parametric t-test procedure, as discussed 564 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 565 * </p><p> 566 * <strong>Preconditions</strong>: <ul> 567 * <li>The sample must include at least 2 observations. 568 * </li></ul></p> 569 * 570 * @param mu constant value to compare sample mean against 571 * @param sampleStats StatisticalSummary describing sample data values 572 * @param alpha significance level of the test 573 * @return p-value 574 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 575 * @throws NumberIsTooSmallException if the number of samples is < 2 576 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 577 * @throws MaxCountExceededException if an error occurs computing the p-value 578 */ 579 public boolean tTest(final double mu, final StatisticalSummary sampleStats, 580 final double alpha) 581 throws NullArgumentException, NumberIsTooSmallException, 582 OutOfRangeException, MaxCountExceededException { 583 584 checkSignificanceLevel(alpha); 585 return tTest(mu, sampleStats) < alpha; 586 587 } 588 589 /** 590 * Returns the <i>observed significance level</i>, or 591 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 592 * comparing the means of the input arrays. 593 * <p> 594 * The number returned is the smallest significance level 595 * at which one can reject the null hypothesis that the two means are 596 * equal in favor of the two-sided alternative that they are different. 597 * For a one-sided test, divide the returned value by 2.</p> 598 * <p> 599 * The test does not assume that the underlying popuation variances are 600 * equal and it uses approximated degrees of freedom computed from the 601 * sample data to compute the p-value. The t-statistic used is as defined in 602 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation 603 * to the degrees of freedom is used, 604 * as described 605 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 606 * here.</a> To perform the test under the assumption of equal subpopulation 607 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p> 608 * <p> 609 * <strong>Usage Note:</strong><br> 610 * The validity of the p-value depends on the assumptions of the parametric 611 * t-test procedure, as discussed 612 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 613 * here</a></p> 614 * <p> 615 * <strong>Preconditions</strong>: <ul> 616 * <li>The observed array lengths must both be at least 2. 617 * </li></ul></p> 618 * 619 * @param sample1 array of sample data values 620 * @param sample2 array of sample data values 621 * @return p-value for t-test 622 * @throws NullArgumentException if the arrays are <code>null</code> 623 * @throws NumberIsTooSmallException if the length of the arrays is < 2 624 * @throws MaxCountExceededException if an error occurs computing the p-value 625 */ 626 public double tTest(final double[] sample1, final double[] sample2) 627 throws NullArgumentException, NumberIsTooSmallException, 628 MaxCountExceededException { 629 630 checkSampleData(sample1); 631 checkSampleData(sample2); 632 // No try-catch or advertised exception because args have just been checked 633 return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2), 634 StatUtils.variance(sample1), StatUtils.variance(sample2), 635 sample1.length, sample2.length); 636 637 } 638 639 /** 640 * Returns the <i>observed significance level</i>, or 641 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 642 * comparing the means of the input arrays, under the assumption that 643 * the two samples are drawn from subpopulations with equal variances. 644 * To perform the test without the equal variances assumption, use 645 * {@link #tTest(double[], double[])}.</p> 646 * <p> 647 * The number returned is the smallest significance level 648 * at which one can reject the null hypothesis that the two means are 649 * equal in favor of the two-sided alternative that they are different. 650 * For a one-sided test, divide the returned value by 2.</p> 651 * <p> 652 * A pooled variance estimate is used to compute the t-statistic. See 653 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes 654 * minus 2 is used as the degrees of freedom.</p> 655 * <p> 656 * <strong>Usage Note:</strong><br> 657 * The validity of the p-value depends on the assumptions of the parametric 658 * t-test procedure, as discussed 659 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 660 * here</a></p> 661 * <p> 662 * <strong>Preconditions</strong>: <ul> 663 * <li>The observed array lengths must both be at least 2. 664 * </li></ul></p> 665 * 666 * @param sample1 array of sample data values 667 * @param sample2 array of sample data values 668 * @return p-value for t-test 669 * @throws NullArgumentException if the arrays are <code>null</code> 670 * @throws NumberIsTooSmallException if the length of the arrays is < 2 671 * @throws MaxCountExceededException if an error occurs computing the p-value 672 */ 673 public double homoscedasticTTest(final double[] sample1, final double[] sample2) 674 throws NullArgumentException, NumberIsTooSmallException, 675 MaxCountExceededException { 676 677 checkSampleData(sample1); 678 checkSampleData(sample2); 679 // No try-catch or advertised exception because args have just been checked 680 return homoscedasticTTest(StatUtils.mean(sample1), 681 StatUtils.mean(sample2), 682 StatUtils.variance(sample1), 683 StatUtils.variance(sample2), 684 sample1.length, sample2.length); 685 686 } 687 688 /** 689 * Performs a 690 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 691 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 692 * and <code>sample2</code> are drawn from populations with the same mean, 693 * with significance level <code>alpha</code>. This test does not assume 694 * that the subpopulation variances are equal. To perform the test assuming 695 * equal variances, use 696 * {@link #homoscedasticTTest(double[], double[], double)}. 697 * <p> 698 * Returns <code>true</code> iff the null hypothesis that the means are 699 * equal can be rejected with confidence <code>1 - alpha</code>. To 700 * perform a 1-sided test, use <code>alpha * 2</code></p> 701 * <p> 702 * See {@link #t(double[], double[])} for the formula used to compute the 703 * t-statistic. Degrees of freedom are approximated using the 704 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 705 * Welch-Satterthwaite approximation.</a></p> 706 * <p> 707 * <strong>Examples:</strong><br><ol> 708 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 709 * the 95% level, use 710 * <br><code>tTest(sample1, sample2, 0.05). </code> 711 * </li> 712 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>, 713 * at the 99% level, first verify that the measured mean of <code>sample 1</code> 714 * is less than the mean of <code>sample 2</code> and then use 715 * <br><code>tTest(sample1, sample2, 0.02) </code> 716 * </li></ol></p> 717 * <p> 718 * <strong>Usage Note:</strong><br> 719 * The validity of the test depends on the assumptions of the parametric 720 * t-test procedure, as discussed 721 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 722 * here</a></p> 723 * <p> 724 * <strong>Preconditions</strong>: <ul> 725 * <li>The observed array lengths must both be at least 2. 726 * </li> 727 * <li> <code> 0 < alpha < 0.5 </code> 728 * </li></ul></p> 729 * 730 * @param sample1 array of sample data values 731 * @param sample2 array of sample data values 732 * @param alpha significance level of the test 733 * @return true if the null hypothesis can be rejected with 734 * confidence 1 - alpha 735 * @throws NullArgumentException if the arrays are <code>null</code> 736 * @throws NumberIsTooSmallException if the length of the arrays is < 2 737 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 738 * @throws MaxCountExceededException if an error occurs computing the p-value 739 */ 740 public boolean tTest(final double[] sample1, final double[] sample2, 741 final double alpha) 742 throws NullArgumentException, NumberIsTooSmallException, 743 OutOfRangeException, MaxCountExceededException { 744 745 checkSignificanceLevel(alpha); 746 return tTest(sample1, sample2) < alpha; 747 748 } 749 750 /** 751 * Performs a 752 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 753 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 754 * and <code>sample2</code> are drawn from populations with the same mean, 755 * with significance level <code>alpha</code>, assuming that the 756 * subpopulation variances are equal. Use 757 * {@link #tTest(double[], double[], double)} to perform the test without 758 * the assumption of equal variances. 759 * <p> 760 * Returns <code>true</code> iff the null hypothesis that the means are 761 * equal can be rejected with confidence <code>1 - alpha</code>. To 762 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test 763 * without the assumption of equal subpopulation variances, use 764 * {@link #tTest(double[], double[], double)}.</p> 765 * <p> 766 * A pooled variance estimate is used to compute the t-statistic. See 767 * {@link #t(double[], double[])} for the formula. The sum of the sample 768 * sizes minus 2 is used as the degrees of freedom.</p> 769 * <p> 770 * <strong>Examples:</strong><br><ol> 771 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 772 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code> 773 * </li> 774 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code> 775 * at the 99% level, first verify that the measured mean of 776 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 777 * and then use 778 * <br><code>tTest(sample1, sample2, 0.02) </code> 779 * </li></ol></p> 780 * <p> 781 * <strong>Usage Note:</strong><br> 782 * The validity of the test depends on the assumptions of the parametric 783 * t-test procedure, as discussed 784 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 785 * here</a></p> 786 * <p> 787 * <strong>Preconditions</strong>: <ul> 788 * <li>The observed array lengths must both be at least 2. 789 * </li> 790 * <li> <code> 0 < alpha < 0.5 </code> 791 * </li></ul></p> 792 * 793 * @param sample1 array of sample data values 794 * @param sample2 array of sample data values 795 * @param alpha significance level of the test 796 * @return true if the null hypothesis can be rejected with 797 * confidence 1 - alpha 798 * @throws NullArgumentException if the arrays are <code>null</code> 799 * @throws NumberIsTooSmallException if the length of the arrays is < 2 800 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 801 * @throws MaxCountExceededException if an error occurs computing the p-value 802 */ 803 public boolean homoscedasticTTest(final double[] sample1, final double[] sample2, 804 final double alpha) 805 throws NullArgumentException, NumberIsTooSmallException, 806 OutOfRangeException, MaxCountExceededException { 807 808 checkSignificanceLevel(alpha); 809 return homoscedasticTTest(sample1, sample2) < alpha; 810 811 } 812 813 /** 814 * Returns the <i>observed significance level</i>, or 815 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 816 * comparing the means of the datasets described by two StatisticalSummary 817 * instances. 818 * <p> 819 * The number returned is the smallest significance level 820 * at which one can reject the null hypothesis that the two means are 821 * equal in favor of the two-sided alternative that they are different. 822 * For a one-sided test, divide the returned value by 2.</p> 823 * <p> 824 * The test does not assume that the underlying population variances are 825 * equal and it uses approximated degrees of freedom computed from the 826 * sample data to compute the p-value. To perform the test assuming 827 * equal variances, use 828 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p> 829 * <p> 830 * <strong>Usage Note:</strong><br> 831 * The validity of the p-value depends on the assumptions of the parametric 832 * t-test procedure, as discussed 833 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 834 * here</a></p> 835 * <p> 836 * <strong>Preconditions</strong>: <ul> 837 * <li>The datasets described by the two Univariates must each contain 838 * at least 2 observations. 839 * </li></ul></p> 840 * 841 * @param sampleStats1 StatisticalSummary describing data from the first sample 842 * @param sampleStats2 StatisticalSummary describing data from the second sample 843 * @return p-value for t-test 844 * @throws NullArgumentException if the sample statistics are <code>null</code> 845 * @throws NumberIsTooSmallException if the number of samples is < 2 846 * @throws MaxCountExceededException if an error occurs computing the p-value 847 */ 848 public double tTest(final StatisticalSummary sampleStats1, 849 final StatisticalSummary sampleStats2) 850 throws NullArgumentException, NumberIsTooSmallException, 851 MaxCountExceededException { 852 853 checkSampleData(sampleStats1); 854 checkSampleData(sampleStats2); 855 return tTest(sampleStats1.getMean(), sampleStats2.getMean(), 856 sampleStats1.getVariance(), sampleStats2.getVariance(), 857 sampleStats1.getN(), sampleStats2.getN()); 858 859 } 860 861 /** 862 * Returns the <i>observed significance level</i>, or 863 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 864 * comparing the means of the datasets described by two StatisticalSummary 865 * instances, under the hypothesis of equal subpopulation variances. To 866 * perform a test without the equal variances assumption, use 867 * {@link #tTest(StatisticalSummary, StatisticalSummary)}. 868 * <p> 869 * The number returned is the smallest significance level 870 * at which one can reject the null hypothesis that the two means are 871 * equal in favor of the two-sided alternative that they are different. 872 * For a one-sided test, divide the returned value by 2.</p> 873 * <p> 874 * See {@link #homoscedasticT(double[], double[])} for the formula used to 875 * compute the t-statistic. The sum of the sample sizes minus 2 is used as 876 * the degrees of freedom.</p> 877 * <p> 878 * <strong>Usage Note:</strong><br> 879 * The validity of the p-value depends on the assumptions of the parametric 880 * t-test procedure, as discussed 881 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 882 * </p><p> 883 * <strong>Preconditions</strong>: <ul> 884 * <li>The datasets described by the two Univariates must each contain 885 * at least 2 observations. 886 * </li></ul></p> 887 * 888 * @param sampleStats1 StatisticalSummary describing data from the first sample 889 * @param sampleStats2 StatisticalSummary describing data from the second sample 890 * @return p-value for t-test 891 * @throws NullArgumentException if the sample statistics are <code>null</code> 892 * @throws NumberIsTooSmallException if the number of samples is < 2 893 * @throws MaxCountExceededException if an error occurs computing the p-value 894 */ 895 public double homoscedasticTTest(final StatisticalSummary sampleStats1, 896 final StatisticalSummary sampleStats2) 897 throws NullArgumentException, NumberIsTooSmallException, 898 MaxCountExceededException { 899 900 checkSampleData(sampleStats1); 901 checkSampleData(sampleStats2); 902 return homoscedasticTTest(sampleStats1.getMean(), 903 sampleStats2.getMean(), 904 sampleStats1.getVariance(), 905 sampleStats2.getVariance(), 906 sampleStats1.getN(), sampleStats2.getN()); 907 908 } 909 910 /** 911 * Performs a 912 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 913 * two-sided t-test</a> evaluating the null hypothesis that 914 * <code>sampleStats1</code> and <code>sampleStats2</code> describe 915 * datasets drawn from populations with the same mean, with significance 916 * level <code>alpha</code>. This test does not assume that the 917 * subpopulation variances are equal. To perform the test under the equal 918 * variances assumption, use 919 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}. 920 * <p> 921 * Returns <code>true</code> iff the null hypothesis that the means are 922 * equal can be rejected with confidence <code>1 - alpha</code>. To 923 * perform a 1-sided test, use <code>alpha * 2</code></p> 924 * <p> 925 * See {@link #t(double[], double[])} for the formula used to compute the 926 * t-statistic. Degrees of freedom are approximated using the 927 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 928 * Welch-Satterthwaite approximation.</a></p> 929 * <p> 930 * <strong>Examples:</strong><br><ol> 931 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 932 * the 95%, use 933 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code> 934 * </li> 935 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> 936 * at the 99% level, first verify that the measured mean of 937 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 938 * and then use 939 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code> 940 * </li></ol></p> 941 * <p> 942 * <strong>Usage Note:</strong><br> 943 * The validity of the test depends on the assumptions of the parametric 944 * t-test procedure, as discussed 945 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 946 * here</a></p> 947 * <p> 948 * <strong>Preconditions</strong>: <ul> 949 * <li>The datasets described by the two Univariates must each contain 950 * at least 2 observations. 951 * </li> 952 * <li> <code> 0 < alpha < 0.5 </code> 953 * </li></ul></p> 954 * 955 * @param sampleStats1 StatisticalSummary describing sample data values 956 * @param sampleStats2 StatisticalSummary describing sample data values 957 * @param alpha significance level of the test 958 * @return true if the null hypothesis can be rejected with 959 * confidence 1 - alpha 960 * @throws NullArgumentException if the sample statistics are <code>null</code> 961 * @throws NumberIsTooSmallException if the number of samples is < 2 962 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 963 * @throws MaxCountExceededException if an error occurs computing the p-value 964 */ 965 public boolean tTest(final StatisticalSummary sampleStats1, 966 final StatisticalSummary sampleStats2, 967 final double alpha) 968 throws NullArgumentException, NumberIsTooSmallException, 969 OutOfRangeException, MaxCountExceededException { 970 971 checkSignificanceLevel(alpha); 972 return tTest(sampleStats1, sampleStats2) < alpha; 973 974 } 975 976 //----------------------------------------------- Protected methods 977 978 /** 979 * Computes approximate degrees of freedom for 2-sample t-test. 980 * 981 * @param v1 first sample variance 982 * @param v2 second sample variance 983 * @param n1 first sample n 984 * @param n2 second sample n 985 * @return approximate degrees of freedom 986 */ 987 protected double df(double v1, double v2, double n1, double n2) { 988 return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) / 989 ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) / 990 (n2 * n2 * (n2 - 1d))); 991 } 992 993 /** 994 * Computes t test statistic for 1-sample t-test. 995 * 996 * @param m sample mean 997 * @param mu constant to test against 998 * @param v sample variance 999 * @param n sample n 1000 * @return t test statistic 1001 */ 1002 protected double t(final double m, final double mu, 1003 final double v, final double n) { 1004 return (m - mu) / FastMath.sqrt(v / n); 1005 } 1006 1007 /** 1008 * Computes t test statistic for 2-sample t-test. 1009 * <p> 1010 * Does not assume that subpopulation variances are equal.</p> 1011 * 1012 * @param m1 first sample mean 1013 * @param m2 second sample mean 1014 * @param v1 first sample variance 1015 * @param v2 second sample variance 1016 * @param n1 first sample n 1017 * @param n2 second sample n 1018 * @return t test statistic 1019 */ 1020 protected double t(final double m1, final double m2, 1021 final double v1, final double v2, 1022 final double n1, final double n2) { 1023 return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2)); 1024 } 1025 1026 /** 1027 * Computes t test statistic for 2-sample t-test under the hypothesis 1028 * of equal subpopulation variances. 1029 * 1030 * @param m1 first sample mean 1031 * @param m2 second sample mean 1032 * @param v1 first sample variance 1033 * @param v2 second sample variance 1034 * @param n1 first sample n 1035 * @param n2 second sample n 1036 * @return t test statistic 1037 */ 1038 protected double homoscedasticT(final double m1, final double m2, 1039 final double v1, final double v2, 1040 final double n1, final double n2) { 1041 final double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2); 1042 return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2)); 1043 } 1044 1045 /** 1046 * Computes p-value for 2-sided, 1-sample t-test. 1047 * 1048 * @param m sample mean 1049 * @param mu constant to test against 1050 * @param v sample variance 1051 * @param n sample n 1052 * @return p-value 1053 * @throws MaxCountExceededException if an error occurs computing the p-value 1054 * @throws MathIllegalArgumentException if n is not greater than 1 1055 */ 1056 protected double tTest(final double m, final double mu, 1057 final double v, final double n) 1058 throws MaxCountExceededException, MathIllegalArgumentException { 1059 1060 double t = FastMath.abs(t(m, mu, v, n)); 1061 TDistribution distribution = new TDistribution(n - 1); 1062 return 2.0 * distribution.cumulativeProbability(-t); 1063 1064 } 1065 1066 /** 1067 * Computes p-value for 2-sided, 2-sample t-test. 1068 * <p> 1069 * Does not assume subpopulation variances are equal. Degrees of freedom 1070 * are estimated from the data.</p> 1071 * 1072 * @param m1 first sample mean 1073 * @param m2 second sample mean 1074 * @param v1 first sample variance 1075 * @param v2 second sample variance 1076 * @param n1 first sample n 1077 * @param n2 second sample n 1078 * @return p-value 1079 * @throws MaxCountExceededException if an error occurs computing the p-value 1080 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not 1081 * strictly positive 1082 */ 1083 protected double tTest(final double m1, final double m2, 1084 final double v1, final double v2, 1085 final double n1, final double n2) 1086 throws MaxCountExceededException, NotStrictlyPositiveException { 1087 1088 final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2)); 1089 final double degreesOfFreedom = df(v1, v2, n1, n2); 1090 TDistribution distribution = new TDistribution(degreesOfFreedom); 1091 return 2.0 * distribution.cumulativeProbability(-t); 1092 1093 } 1094 1095 /** 1096 * Computes p-value for 2-sided, 2-sample t-test, under the assumption 1097 * of equal subpopulation variances. 1098 * <p> 1099 * The sum of the sample sizes minus 2 is used as degrees of freedom.</p> 1100 * 1101 * @param m1 first sample mean 1102 * @param m2 second sample mean 1103 * @param v1 first sample variance 1104 * @param v2 second sample variance 1105 * @param n1 first sample n 1106 * @param n2 second sample n 1107 * @return p-value 1108 * @throws MaxCountExceededException if an error occurs computing the p-value 1109 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not 1110 * strictly positive 1111 */ 1112 protected double homoscedasticTTest(double m1, double m2, 1113 double v1, double v2, 1114 double n1, double n2) 1115 throws MaxCountExceededException, NotStrictlyPositiveException { 1116 1117 final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); 1118 final double degreesOfFreedom = n1 + n2 - 2; 1119 TDistribution distribution = new TDistribution(degreesOfFreedom); 1120 return 2.0 * distribution.cumulativeProbability(-t); 1121 1122 } 1123 1124 /** 1125 * Check significance level. 1126 * 1127 * @param alpha significance level 1128 * @throws OutOfRangeException if the significance level is out of bounds. 1129 */ 1130 private void checkSignificanceLevel(final double alpha) 1131 throws OutOfRangeException { 1132 1133 if (alpha <= 0 || alpha > 0.5) { 1134 throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL, 1135 alpha, 0.0, 0.5); 1136 } 1137 1138 } 1139 1140 /** 1141 * Check sample data. 1142 * 1143 * @param data Sample data. 1144 * @throws NullArgumentException if {@code data} is {@code null}. 1145 * @throws NumberIsTooSmallException if there is not enough sample data. 1146 */ 1147 private void checkSampleData(final double[] data) 1148 throws NullArgumentException, NumberIsTooSmallException { 1149 1150 if (data == null) { 1151 throw new NullArgumentException(); 1152 } 1153 if (data.length < 2) { 1154 throw new NumberIsTooSmallException( 1155 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC, 1156 data.length, 2, true); 1157 } 1158 1159 } 1160 1161 /** 1162 * Check sample data. 1163 * 1164 * @param stat Statistical summary. 1165 * @throws NullArgumentException if {@code data} is {@code null}. 1166 * @throws NumberIsTooSmallException if there is not enough sample data. 1167 */ 1168 private void checkSampleData(final StatisticalSummary stat) 1169 throws NullArgumentException, NumberIsTooSmallException { 1170 1171 if (stat == null) { 1172 throw new NullArgumentException(); 1173 } 1174 if (stat.getN() < 2) { 1175 throw new NumberIsTooSmallException( 1176 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC, 1177 stat.getN(), 2, true); 1178 } 1179 1180 } 1181 1182 }