Tests for random number generatorsHere we present some statistical tests (quantitative and qualitative), in order to show random number generators performing reliably. K-S testK-S test is a well known goodness of fit test that compares the \(U(0,1)\) cumulative distribution with the empirical one generated with a pseudo random number generator, particularly, quickndirty. For our purposes, \(F_i\) is the theoretical cumulative distribution of a random variable \(U(0,1)\) $$ F_i = \int_{0}^{x_i}dx = x_i $$ where The subscript i represents any discrete bin. We take the measure that we call K-S measure and as smaller (close to zero) it is, means that the random sample, that we have generated with quickndirty, has its origins in a uniform distribution.
import quickndirty as qnd
from scipy import zeros, arange, array
n = 100 # sample's size
rng = qnd.Rng()
random = rng.random
u = zeros(n+2)
u[n+1] = 1.0
for i in xrange(n):
u[i+1] = random()
u.sort()
X = []
F = []
for x in arange(0, 1, 0.002):
X.append(x)
for i in xrange(len(u)):
if x >= u[i] and x < u[i+1]:
F.append(i/float(n))
break
X = array(X)
F = array(F)
ks = max(abs(F-X))
print 'K-S Test =', ks
Chi-square testChi-square test is another goodness of fit test.
n = 100 # sample's size
k = 20 # number of bins
import random
import quickndirty as qnd
from scipy.stats import chisquare
from scipy import ones, zeros
rng = qnd.Rng()
p_obs = zeros(k)
for j in xrange(n):
u = rng.random()
for i in xrange(k):
if u >= i/float(k) and u < (i+1)/float(k):
p_obs[i] += 1/float(n)
break
p_exp = ones(k)*k/float(n)
chisq, p_value = chisquare(p_obs, p_exp)
print "sample's chi-square value =", chisq
print "sample's p-value =", p_value
Variance lawThe variance law is not a strict law, in practice, it's a consequence of the Weak Law of Large Numbers that states that: where Var(X) is the empirical variance, We have sampled n random integers between 0 and 127 to fulfill 128 bins. Each bin have an average value where Ci is the number of items inside each bin. So Var(X) is the variance of the Xi sample. The images below show the histograms where we can observe that the variance decreases as the sample size increases and the the last image shows that the variance scales with the sample size (n) following a power law with coefficient
#!/usr/bin/env python
import quickndirty as qnd
from pylab import *
rng = qnd.Rng()
x = []
y = []
for m in (100, 1000, 10000):
k = 7
n = (2**k) * m
count = zeros(2**k)
for i in xrange(n):
w = rng.getrandbits(k) # generate integers from 0 to 2**k
count[w] += 1
count /= m
sigma = var(count)
x.append(n)
y.append(sigma)
clf()
plot(count)
xlim(0,2**k-1)
ylim(0.7,1.3)
title(r"n = %d, $\sigma^2$ = %f" % (n, sigma))
savefig("histogram_%d" % n, dpi=50)
clf()
(b1, b0) = polyfit(log(x), log(y), 1)
z = exp(polyval([b1, b0], log(x)))
loglog(x, y, 'bo')
loglog(x, z, 'r--')
ylabel(r'$\sigma^2$')
xlabel('n')
title(r"$\alpha$ = %f" % b1)
savefig("variance", dpi=50)
Spatial correlations
#!/usr/bin/env python
import quickndirty as qnd
from pylab import *
rng = qnd.Rng()
random = rng.getrandbits
# parameters
p = 8
N = 100
T = 100
# computed parameters
q = p*p
y = zeros(q)
m = zeros((p,p))
n = q*N
l = int(log2(p))
f = 1.0/(n/q)
# computing loops
for t in xrange(T):
for k in xrange(n):
i = random(l)
j = random(l)
m[i][j] += f/T
# computing averages
k = 0
for i in xrange(p):
for j in xrange(p):
y[k] = m[i][j]
k += 1
# creating graphs
axhline(y=1, lw=2, color='r')
x = arange(1,q+1)
plot(x,y,'b-o',lw=2)
xlim(1, q)
ylim(0.8, 1.2)
show()
Computing pi
#!/usr/bin/env python
import quickndirty as qnd
from pylab import *
rng = qnd.Rng()
random = rng.random
def computepi(n):
a = 0.0
for t in xrange(n):
x = random()
y = random()
r = sqrt(1-x*x)
if y < r:
a += 1.0
return a*4/n
y = []
for t in xrange(1000, 101000, 1000):
y.append(computepi(t))
# creating graphs
axhline(y=pi, lw=2, color='r')
plot(y,'b-o',lw=2)
show()
Central Limit Theorem
#!/usr/bin/env python
import quickndirty as qnd
from pylab import *
rng = qnd.Rng()
random = rng.random
n = 500
bins = 20
N = n*bins
for k in (1,2,4,8):
q = sqrt(1.0/3.0) * sqrt(k)
h = zeros(N)
for t in xrange(N):
h[t] = sum( -1 + random()*2 for i in xrange(k) ) / q
clf()
hist(h, bins=bins, normed=1)
xlim(-3,3)
ylim(0,0.4)
savefig('hist_%d' % k, dpi=50)
ProjectThis project is hosted at Google: Download
Last modified October 17, 2011 2:04 am / Skin by Kevin Hughes
![]() |