Skip to content

Commit 985d5c6

Browse files
committed
update study scripts
1 parent ec2e2db commit 985d5c6

File tree

3 files changed

+149
-0
lines changed

3 files changed

+149
-0
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Author: Jake VanderPlas
2+
# License: BSD
3+
# The figure produced by this code is published in the textbook
4+
# "Statistics, Data Mining, and Machine Learning in Astronomy" (2013)
5+
# For more information, see http://astroML.github.com
6+
# To report a bug or issue, use the following forum:
7+
# https://groups.google.com/forum/#!forum/astroml-general
8+
9+
import numpy as np
10+
from scipy.stats import binom
11+
from matplotlib import pyplot as plt
12+
13+
# ----------------------------------------------------------------------
14+
# This function adjusts matplotlib settings for a uniform feel in the textbook.
15+
# Note that with usetex=True, fonts are rendered with LaTeX. This may
16+
# result in an error if LaTeX is not installed on your system. In that case,
17+
# you can set usetex to False.
18+
from astroML.plotting import setup_text_plots
19+
20+
setup_text_plots(fontsize=8, usetex=True)
21+
22+
# ------------------------------------------------------------
23+
# Define the distribution parameters to be plotted
24+
n_values = [255, 255]
25+
b_values = [1.0 / 6]
26+
linestyles = ['-']
27+
x = np.arange(-1, 200)
28+
29+
# ------------------------------------------------------------
30+
# plot the distributions
31+
fig, ax = plt.subplots(figsize=(5, 3.75))
32+
33+
for (n, b, ls) in zip(n_values, b_values, linestyles):
34+
# create a binomial distribution
35+
dist = binom(n, b)
36+
37+
plt.plot(x, dist.pmf(x), ls=ls, c='black',
38+
label=r'$b=%.1f,\ n=%i$' % (b, n), linestyle='steps-mid')
39+
40+
plt.xlim(-0.5, 256)
41+
plt.ylim(0, 0.2)
42+
43+
plt.xlabel('$x$')
44+
plt.ylabel(r'$p(x|b, n)$')
45+
plt.title('Binomial Distribution')
46+
47+
plt.legend()
48+
plt.show()
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
from scipy import stats
2+
import scipy
3+
from scipy.stats import distributions
4+
5+
import numpy as np
6+
7+
8+
def binom_test(x, n=None, p=0.5, alternative='two-sided'):
9+
print x, n, p , alternative
10+
"""
11+
Perform a test that the probability of success is p.
12+
13+
This is an exact, two-sided test of the null hypothesis
14+
that the probability of success in a Bernoulli experiment
15+
is `p`.
16+
17+
Parameters
18+
----------
19+
x : integer or array_like
20+
the number of successes, or if x has length 2, it is the
21+
number of successes and the number of failures.
22+
n : integer
23+
the number of trials. This is ignored if x gives both the
24+
number of successes and failures
25+
p : float, optional
26+
The hypothesized probability of success. 0 <= p <= 1. The
27+
default value is p = 0.5
28+
29+
Returns
30+
-------
31+
p-value : float
32+
The p-value of the hypothesis test
33+
34+
References
35+
----------
36+
.. [1] http://en.wikipedia.org/wiki/Binomial_test
37+
38+
"""
39+
x = scipy.atleast_1d(x).astype(np.integer)
40+
if len(x) == 2:
41+
n = x[1] + x[0]
42+
x = x[0]
43+
elif len(x) == 1:
44+
x = x[0]
45+
if n is None or n < x:
46+
raise ValueError("n must be >= x")
47+
n = np.int_(n)
48+
else:
49+
raise ValueError("Incorrect length for x.")
50+
51+
if (p > 1.0) or (p < 0.0):
52+
raise ValueError("p must be in range [0,1]")
53+
54+
if alternative not in ('two-sided', 'less', 'greater'):
55+
raise ValueError("alternative not recognized\n"
56+
"should be 'two-sided', 'less' or 'greater'")
57+
58+
if alternative == 'less':
59+
pval = distributions.binom.cdf(x, n, p)
60+
return pval
61+
62+
if alternative == 'greater':
63+
pval = distributions.binom.sf(x - 1, n, p)
64+
return pval
65+
66+
# if alternative was neither 'less' nor 'greater', then it's 'two-sided'
67+
d = distributions.binom.pmf(x, n, p)
68+
rerr = 1 + 1e-7
69+
if x == p * n:
70+
# special case as shortcut, would also be handled by `else` below
71+
pval = 1.
72+
elif x < p * n:
73+
i = np.arange(np.ceil(p * n), n + 1)
74+
y = np.sum(distributions.binom.pmf(i, n, p) <= d * rerr, axis=0)
75+
pval = (distributions.binom.cdf(x, n, p) +
76+
distributions.binom.sf(n - y, n, p))
77+
else:
78+
i = np.arange(np.floor(p * n) + 1)
79+
y = np.sum(distributions.binom.pmf(i, n, p) <= d * rerr, axis=0)
80+
pval = (distributions.binom.cdf(y - 1, n, p) +
81+
distributions.binom.sf(x - 1, n, p))
82+
83+
return min(1.0, pval)
84+
85+
86+
print scipy.stats.binom_test(51, 235, 1.0 / 6, alternative='greater')
87+
print binom_test(51, 235, 1.0 / 6, alternative='greater')
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from scipy.integrate import quad
2+
import numpy as np
3+
from scipy.stats import *
4+
5+
6+
def chi2_pdf(x):
7+
return chi2.pdf(x, df=1)
8+
9+
10+
ans0, err0 = quad(norm.pdf, 0.6, np.inf)
11+
ans1, err1 = quad(chi2_pdf, 0.6, np.inf)
12+
13+
print ans0, err0
14+
print ans1, err1

0 commit comments

Comments
 (0)