/* testgauss.c - statistical tests for gaussian random numbers
 *
 * Copyright (C) 2005  Jochen Voss.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * $Id: testgauss.c 6509 2005-07-07 18:31:10Z voss $
 */

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <assert.h>

#include <gsl/gsl_rng.h>


extern  double  gsl_ran_gaussian_ziggurat (gsl_rng *r, const double sigma);


static  double  alpha = 0.0001;	/* level of tests */
static  gsl_rng *rng;

/**********************************************************************
 * testing framework
 */

typedef  double (*n_test_fn) (void *client_data);
typedef  int (*b_test_fn) (void *client_data);

static double
Phi (double x)
/* The distribution function of the standard normal distribution.  */
{
  return  (1+erf (x/M_SQRT2))/2;
}

static double
limit (double q)
/* the inverse of the distribution function (found by bisection) */
{
  double  l, r;
  
  assert (q < 0.5);

  l = -1;  r = 0;
  while (Phi(l) > q)  r = l, l -= 1;
  /* Phi(l) <= q && q < Phi(r) */
  while (l+1e-6 < r) {
    double  m = 0.5*(l+r);
    if (Phi(m) <= q) {
      l = m;
    } else {
      r = m;
    }
  }
  return  -l;
}

static unsigned
sample_size (double t, double v, double d)
/* Return the number of iterations needed to make effects of size D visible.
 * T is the 1-alpha/2 quantile, V the variance.  */
{
  double  x;

  x = t*sqrt(v)/d;
  assert (x*x <= UINT_MAX);
  return  x*x + 0.5;
}

void
normal_test (n_test_fn f, void *client_data, double m, double dm, double v,
	     const char *msg, int *fail_flag)
/* Test for the mean M of a gaussion distribution with known variance V.
 * The hypothesis is that the values are in [m-dm, m+dm]. */
{
  double  s, t, d;
  unsigned  n, steps;

  printf ("%s ... ", msg);
  fflush (NULL);

  assert (v > 0);
  t = limit (alpha/2);
  steps = sample_size (t, v, dm);
  s = 0;
  for (n=0; n<steps; ++n) {
    double  x = f (client_data);
    s += x-m;
  }
  s /= n;
  d = t*sqrt(v/n);
  
  if (fabs (s) <= d) {
    puts ("passed");
  } else {
    puts ("FAILED");
    fprintf (stderr,
	     "    Parameter %g is outside the confidence interval [%g; %g]\n"
	     "    This may be due to statistical fluctuations, so try again.\n"
	     "    In the long run this test should fail 1 of %d times.\n",
	     m, s-d, s+d, (int)(1/alpha+0.5));
    *fail_flag = 1;
  }
}

void
binomial_test (b_test_fn f, void *client_data, double p, double dp,
	       const char *msg, int *fail_flag)
/* Test for the parameter P of a binomial distribution.  */
{
  double  e, s, x, t;
  unsigned  n, k, steps;

  printf ("%s ... ", msg);
  fflush (NULL);
  
  assert (dp > 0);
  assert (0 <= p-dp && p+dp <= 1);
  t = limit (alpha/2);
  steps = sample_size (t, p*(1-p), dp);
  for (n=0, k=0; n<steps; ++n) {
    if (f (client_data))  ++k;
  }

  assert (n*p*(1-p) >= 9);
  e = n*p;
  s = sqrt (n*p*(1-p));
  x = (k-e)/s;
  
  if (fabs (x) <= t) {
    puts ("passed");
  } else {
    puts ("FAILED");
    fprintf (stderr,
	     "    Parameter %g is outside the confidence interval [%g; %g]\n"
	     "    This may be due to statistical fluctuations, so try again.\n"
	     "    In the long run this test should fail 1 of %d times.\n",
	     p, (k-t*s)/n, (k+t*s)/n, (int)(1/alpha+0.5));
    *fail_flag = 1;
  }
}

/**********************************************************************
 * individual tests
 */

static double
test1 (void *client_data)
{
  return  gsl_ran_gaussian_ziggurat (rng, M_SQRT2);
}

static int
test2 (void *client_data)
{
  return  (gsl_ran_gaussian_ziggurat (rng, M_SQRT2) >= 1);
}

static int
test3 (void *client_data)
{
  return  (gsl_ran_gaussian_ziggurat (rng, M_SQRT2) >= 3);
}

static double
test4 (void *client_data)
{
  double  x1 = gsl_ran_gaussian_ziggurat (rng, 1.09544511501); /* sqrt(1.2) */
  double  x2 = gsl_ran_gaussian_ziggurat (rng, 1.67332005307); /* sqrt(2.8) */
  return   x1 - x2;
}

static int
test5 (void *client_data)
{
  double  x1 = gsl_ran_gaussian_ziggurat (rng, 1.09544511501); /* sqrt(1.2) */
  double  x2 = gsl_ran_gaussian_ziggurat (rng, 1.67332005307); /* sqrt(2.8) */
  return  (x1-x2 <= -1);
}


int
main ()
{
  const gsl_rng_type *T;
  int  fail = 0;

  gsl_rng_env_setup();
  T = gsl_rng_default;
  rng = gsl_rng_alloc (T);
  
  normal_test (test1, NULL, 0, 0.003, 1, "mean of N(0,2)", &fail);
  binomial_test (test2, NULL, 0.2397500611, 0.0005, "variance of N(0,2)",
		 &fail);
  binomial_test (test3, NULL, 0.0169474268, 0.0001, "tails of N(0,2)",
		 &fail);
  normal_test (test4, NULL, 0, 0.005, 4, "mean of pair differences", &fail);
  binomial_test (test5, NULL, 0.3085375387, 0.001,
		 "variance of pair differences",
		 &fail);
  
  return  fail;
}
