# Copyright (C) 2004 Paul Harrison
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
""" MML estimators. """
from numarray import *
from numarray import random_array, linear_algebra, arrayprint
import sys
euler_constant = 0.577215664901532860606512090082402431042
def dimension_term(D):
""" Approximation of the dimension-dependent terms in MML87.
D : integer
number of dimensions
Error is of order 0.3 nit.
"""
return -0.5*D*log(2*pi) + 0.5*log(D*pi) - euler_constant
def neglog_gaussian_likelihood(N, sigma, sum_of_squares, quantum=1.0):
""" Return the total negative-log-likelihood of some data sampled from a Gaussian
source.
N : float
number of items
sigma : float
standard deviation of the source
sum_of_squares : float
sum of squares of the data
quantum : float
measurement accuracy
Note: This likelihood is a slight approximation,
as the Gaussian distribution is not discrete.
"""
return N*(-log(quantum)+0.5*log(2*pi)+log(sigma)) + sum_of_squares/(2*sigma*sigma)
def individual_neglog_gaussian_likelihoods(sigma, squares, quantum=1.0):
""" Return the negative-log-likelihood of some data sampled from a Gaussian
source.
sigma : float
standard deviation of the source
squares : N-array of float
sum of squares of the data
quantum : float
measurement accuracy
Note: This likelihood is a slight approximation,
as the Gaussian distribution is not discrete.
"""
return squares/(2*sigma*sigma) + (-log(quantum)+0.5*log(2*pi)+log(sigma))
def neglog_multivariate_gaussian_likelihood(N, K, concentration, total_data_outerproduct,
quantum):
""" Multivariate equivalent of neglog_gaussian_likelihood
N : float
number of items
K : float
size of data vectors
concentration : KxK array of floats
inverse of the covariance matrix
total_data_outerproduct : KxK array of floats
total outerproduct of the data
quantum : float
measurement accuracy (volume)
"""
# Note: sum(ravel(total_data_outerproduct*concentration))
# equals dot(dot(transpose(data),concentration,data)
return (
N*(
- log(quantum)
- 0.5*log(linear_algebra.determinant(concentration))
+ 0.5*K*log(2*pi)
)
+ 0.5 * sum(ravel(total_data_outerproduct * concentration))
)
def individual_neglog_multivariate_gaussian_likelihoods(
K, concentration, data_outerproduct, quantum):
""" Multivariate equivalent of individual_neglog_gaussian_likelihoods
K : float
size of data vectors
concentration : KxK array of floats
inverse of the covariance matrix
data_outerproduct : NxKxK array of floats
outerproduct of each datum
quantum : float
measurement accuracy (volume)
"""
return (0.5*sum(reshape(data_outerproduct * concentration, (-1,K*K)),1)
+ (- log(quantum)
- 0.5*log(linear_algebra.determinant(concentration))
+ 0.5*K*log(2*pi)) )
class Estimate:
""" Abstract base class for MML87 estimators.
Fields:
data :
The input data.
weighting : array of floats
If estimate is being used as part of a mixture model, the level of
assignment of each data point to the class this estimate is for.
Otherwise, an array of ones.
data_size :
Total of "weighting", i.e. normally just the len(data).
violates_prior : boolean
Do any model parameters exceed bounds on the given prior?
Sub-classes should define:
def dimensions(self) : number of dimensions in the model
def neglog_prior(self) : negative log of the prior density
def neglog_fisher(self) : negative log of the determinant of the Fisher matrix
def neglog_data(self) : negative log likelihood of each datum
(see documentation)
"""
parameters = [ ] # Names of parameters in the estimate
def __init__(self, data, weighting=None):
self.data = data
if weighting != None:
self.weighting = weighting
else:
self.weighting = ones(len(data), Float)
self.data_size = sum(self.weighting)
self.has_prior = True
self.violates_prior = False
def __repr__(self):
""" Display the estimate's model, length, and whether it violates the prior. """
result = self.__class__.__name__ + ":"
if self.has_prior:
result += " %.2f nits" % self.length()
if self.violates_prior:
result += "\n*** Prior bounds violated! ***"
for item in self.parameters:
result += "\n%15s=" % item
data = getattr(self,item)
if isinstance(data,float):
result += "%.2f" % data
elif type(data) != type(array([])):
result += repr(data)
elif data.shape[0] > 100:
result += "< a large array of numbers >"
else:
result += arrayprint.array2string(data, precision=2, suppress_small=True) \
.replace("\n", "\n"+" "*16)
return result
def total_neglog_data(self):
return sum(self.neglog_data() * self.weighting)
def length(self):
""" Calculate the message length. """
assert self.has_prior
return self.neglog_prior() \
- 0.5*self.neglog_fisher() \
+ self.total_neglog_data() \
+ dimension_term(self.dimensions())
def test(self):
""" Simple numerical self-consistency test:
Is there any single parameter that, if altered slightly, reduces the length?
If so, the estimate is not consistent with the length calculation.
Also tests that total_neglog_data and neglog_data are consistent.
Limitation: Does not detect the length calculation being out by a constant.
"""
base_length = self.length()
any_bad = False
print "Testing"
discrepancy = abs(self.total_neglog_data() - Estimate.total_neglog_data(self))
if discrepancy > 1e-6:
print "Inconsistency between total_neglog_data and neglog_data:", discrepancy
any_bad = True
def get(position):
if shape == ():
return getattr(self,parameter)
else:
return data[position]
def set(position, value):
if shape == ():
setattr(self,parameter,value)
else:
data[position] = value
originals = { }
for parameter in self.parameters:
data = getattr(self,parameter)
if isinstance(data,float):
originals[parameter] = data
elif type(data) == type(array([])):
originals[parameter] = data.copy()
for parameter in originals:
data = getattr(self,parameter)
if isinstance(data,float):
shape = ()
elif type(data) != type(array([])):
continue
else:
shape = data.shape
positions = [ () ]
for item in shape:
new_positions = [ ]
for i in xrange(item):
new_positions.extend([ j+(i,) for j in positions ])
positions = new_positions
for position in positions:
for factor in [ 0.5, 0.9, 0.99, 1.01, 1.1, 2.0 ]:
data = getattr(self,parameter)
original = get(position)
set(position, original * factor)
self._test_prep()
l2 = self.length()
for parameter1, value in originals.items():
if type(value) == type(array([])):
value = value.copy()
setattr(self,parameter1,value)
if l2-base_length < -1e-6:
print "Length decreased: ", parameter, position, factor, l2-base_length
any_bad = True
if any_bad:
print "-- FAILED!"
else:
print "-- OK"
return not any_bad
def _test_prep(self): pass
# ^ This can be used to update any intermediate results if parameters change,
# allowing test() to properly test an estimator
class Gaussian_estimate(Estimate):
""" Gaussian distribution estimate.
(Identical to classical estimate, but gives a message length.)
data : array of float
The data.
Required only if an explicit length is needed:
mean_range : (float, float)
The minimum and maximum possible values for the mean.
quantum : float
The measurement accuracy.
Also, the minimum possible sigma.
max_sigma : float
The maximum possible sigma.
"""
parameters = [ 'mean', 'sigma' ]
def __init__(self, data,
mean_range=None, quantum=None, max_sigma=None,
weighting=None):
Estimate.__init__(self, data, weighting)
self.mean = sum(self.data*self.weighting) / self.data_size
deviation = data - self.mean
self.squared_deviation = deviation*deviation
self.sum_squared_deviation = sum(self.squared_deviation*self.weighting)
self.sigma = (self.sum_squared_deviation / (self.data_size-1.0)) ** 0.5
if mean_range == None or quantum == None or max_sigma == None:
self.has_prior = False
else:
self.has_prior = True
self.quantum = quantum
self.mean_range = mean_range
self.max_sigma = max_sigma
self.violates_prior = \
self.mean < self.mean_range[0] or \
self.mean > self.mean_range[1] or \
self.sigma < self.quantum or \
self.sigma > self.max_sigma
def dimensions(self):
# mean, sigma
return 2
def neglog_prior(self):
R_mean = self.mean_range[1] - self.mean_range[0]
R_sigma = log(self.max_sigma) - log(self.quantum)
return log(R_mean) + log(R_sigma) + log(self.sigma)
def neglog_fisher(self):
return -log(2) - 2*log(self.data_size) + 4*log(self.sigma)
def total_neglog_data(self):
return neglog_gaussian_likelihood(self.data_size, self.sigma,
self.sum_squared_deviation, self.quantum)
def neglog_data(self):
return individual_neglog_gaussian_likelihoods(
self.sigma, self.squared_deviation, self.quantum)
class Discrete_estimate(Estimate):
""" Discrete estimate.
Given a list of items, estimate the likelihood of each type of item.
Based on multinomial estimate described in the book, however the order
of items is also encoded in the message.
data : array of integers
Type of each item (0 <= item <= n_types).
n_types : integer
Number of types of item.
Optional:
alpha : n_types-array of float
Conjugate prior, as though alpha[i] of each type i had already been observed.
alpha[i] = 1.0 is a uniform prior, and the default.
"""
parameters = [ "probability" ]
def __init__(self, data, n_types, alpha=None, weighting=None):
Estimate.__init__(self, data, weighting)
self.n_types = n_types
self.conjugate_prior = (alpha != None)
if self.conjugate_prior:
self.alpha = alpha
else:
self.alpha = ones(n_types, Float)
sum_alpha = sum(self.alpha)
self.counts = zeros(n_types, Float)
for i in xrange(len(data)):
self.counts[data[i]] += self.weighting[i]
self.probability = ( (self.counts+self.alpha-0.5)
/ (self.data_size+sum_alpha-0.5*n_types) )
def _test_prep(self):
# Used by Estimate.test()
self.probability /= sum(self.probability)
def dimensions(self):
# self.probability must add to 1.0, so the model-space has n-1 dimensions.
return self.n_types-1
def neglog_prior(self):
# Makes no actual difference,
# just don't depend on scipy unless necessary.
if self.conjugate_prior:
from scipy.special import gammaln
return ( -sum(log(self.probability) * (self.alpha-1.0))
-gammaln(sum(self.alpha))
+sum(gammaln(self.alpha)) )
else:
return -sum(log(arange(1,self.n_types)))
def neglog_fisher(self):
return ( -(self.n_types-1.0) * log(self.data_size)
+sum(log(self.probability)) )
def total_neglog_data(self):
return -sum(self.counts * log(self.probability))
def neglog_data(self):
return take(-log(self.probability), self.data)
class Multivariate_gaussian_estimate(Estimate):
""" Multivariate Gaussian distribution estimate, with a conjugate prior.
data : NxK-array of floats
The data to be encoded.
m : float, > K, default K+1.0
Number of "samples" in the prior for the covariance.
V : KxK-array of floats, default quantum**(2/K)*identity(K)*m
Total covariance matrix of the "samples", defining the prior on the covariance.
Note that this is the total, sum x_i*x_j, not (sum x_i*x_j) / (m-1)
m1 : float, > 0.0, default 1.0
Number of "samples" in the prior for the mean.
u0 : K-array of floats (u0)
Mean of the "samples" defining the prior on the mean.
Required if an explicit length is needed:
quantum : float
*Volume* of the measurement accuracy of the data.
Note: This class requires SciPy (for gammaln function)
"""
parameters = [ "mean", "covariance" ]
def __init__(self, data, quantum=None, m=None,V=None,m1=1.0,u0=None, weighting=None):
assert quantum is not None or (V is not None and u0 is not None)
Estimate.__init__(self, data, weighting)
self.K = data.shape[1]
if m is None:
m = self.K + 1.0
if V is None:
V = identity(self.K) * (quantum**(2.0/self.K)) * m
if u0 is None:
u0 = zeros(self.K, Float)
self.m = m
self.V = V
self.m1 = m1
self.u0 = u0
weighted_data = data * self.weighting[:,NewAxis]
self.mean = (m1*u0 + sum(weighted_data)) / (m1 + self.data_size)
deviation = data - self.mean
u0_deviation = u0 - self.mean
self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis]
self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct)
total_covariance = m1*outerproduct(u0_deviation, u0_deviation) \
+ V + self.total_data_outerproduct
self.covariance = total_covariance / (self.data_size+m+self.K-2)
if quantum is None:
self.has_prior = False
else:
self.quantum = quantum
self.det_V = linear_algebra.determinant(self.V)
self.concentration = linear_algebra.inverse(self.covariance)
self.det_concentration = linear_algebra.determinant(self.concentration)
def _test_prep(self):
# Used by Estimate.test()
self.concentration = linear_algebra.inverse(self.covariance)
self.det_concentration = linear_algebra.determinant(self.concentration)
deviation = data - self.mean
self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis]
self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct)
def dimensions(self):
# K parameters for the mean,
# K*(K+1)/2 for the covariance (covariance matrix is symmetric)
return self.K + self.K*(self.K+1)/2
def neglog_prior(self):
from scipy.special import gammaln
h0_prior = log(self.det_concentration)
log_gamma_sum = 0.0
for i in xrange(1,self.K+1):
log_gamma_sum += gammaln( (self.m-i)/2.0 )
concentration_prior = (
log(pi) * self.K*(self.K-1)/4.0
- log(2) * self.K*(self.m-1)/2.0
+ log_gamma_sum
- log(self.det_V) * (self.m-self.K-2)/2.0
- log(self.det_concentration) * (self.m-1)/2.0
+ 0.5 * trace(dot(self.concentration, self.V))
) # (Wishart distribution)
u0_deviation = self.u0 - self.mean
mean_prior = neglog_multivariate_gaussian_likelihood(
1.0, self.K, self.m1*self.concentration,
outerproduct(u0_deviation, u0_deviation), 1.0 )
# Was... (oops)
#mean_prior = neglog_multivariate_gaussian_likelihood(
# 1.0, self.K, self.m1*self.concentration,
# outerproduct(self.u0, self.u0), 1.0 )
return h0_prior + concentration_prior + mean_prior
def neglog_fisher(self):
return - (self.K + self.K*(self.K+1)/2) * log(self.data_size) \
+ self.K*log(2.0) + self.K*log(self.det_concentration)
def total_neglog_data(self):
return neglog_multivariate_gaussian_likelihood(
self.data_size, self.K, self.concentration,
self.total_data_outerproduct, self.quantum)
def neglog_data(self):
return individual_neglog_multivariate_gaussian_likelihoods(
self.K, self.concentration, self.data_outerproduct, self.quantum)
class Regression_estimate(Estimate):
""" Regression with a Gaussian prior for the model weights.
data : N-array of floats
The data to be encoded.
given_data : NxK-array of floats
Some data known by both sender and receiver,
which may be used to help transmit the data efficiently.
Each column should be normalized to have standard deviation of one and a mean
of zero. If a constant term is required (ie if the data has non-zero mean),
include a column consisting entirely of ones.
m : float
The expected ratio of the variance of model weights to the variance of the
residual. m=1 would be a neutral choice.
Required only if an explicit length is needed:
quantum : float
The measurement accuracy of the data.
max_sigma : float
The maximum expected standard deviation of the residual.
A safe choice for this would be the standard deviation of the data.
"""
parameters = [ "sigma", "model" ]
def __init__(self, data, given_data,
m=1.0, quantum=None, max_sigma=None,
weighting=None):
Estimate.__init__(self, data, weighting)
self.m = m
self.model_size = given_data.shape[1]
transposed_weighted_given_data = transpose(given_data) * self.weighting
self.left_hand_side = dot(transposed_weighted_given_data, given_data) + \
m * identity(self.model_size)
right_hand_side = dot(transposed_weighted_given_data, data)
self.model = \
linear_algebra.solve_linear_equations(self.left_hand_side, right_hand_side)
residual = data - dot(given_data, self.model)
self.squared_residual = residual*residual
self.total_squared_residual = sum(self.squared_residual*self.weighting)
model_squared = dot(self.model, self.model)
self.sigma = ((self.total_squared_residual+m*model_squared)
/ self.data_size) ** 0.5
if quantum == None or max_sigma == None:
self.has_prior = False
else:
self.quantum = quantum
self.max_sigma = max_sigma
self.violates_prior = self.sigma < self.quantum or \
self.sigma > self.max_sigma
def _test_prep(self):
# used by Estimate.test()
residual = data - dot(given_data, self.model)
self.squared_residual = residual*residual
self.total_squared_residual = sum(self.squared_residual*self.weighting)
def dimensions(self):
return self.model_size + 1
def neglog_prior(self):
R_sigma = log(self.max_sigma) - log(self.quantum)
model_squared = dot(self.model, self.model)
return ( log(R_sigma) + log(self.sigma)
+ neglog_gaussian_likelihood(self.model_size, self.sigma / (self.m**0.5),
model_squared, 1.0) )
def neglog_fisher(self):
return ( - log(2*(self.model_size+self.data_size))
- log(linear_algebra.determinant(self.left_hand_side))
+ (self.model_size*2+2)*log(self.sigma) )
def total_neglog_data(self):
return neglog_gaussian_likelihood(self.data_size, self.sigma,
self.total_squared_residual, self.quantum)
def neglog_data(self):
return individual_neglog_gaussian_likelihoods(
self.sigma, self.squared_residual, self.quantum)
class Hidden_factor_estimate(Estimate):
""" Model of multivariate data as having a single hidden factor,
as described in the paper "Single Factor Analysis by MML Estimation"
by C.S. Wallace and P.R. Freeman.
data : NxK-array of floats
The data to be encoded.
Required only if an explicit length is needed:
mean_range : Kx2-array of Float
Anticipated minimum and maximum values for the mean.
quantum : K-array of Float
Measurement accuracy.
max_sigma : K-array of Float
The maximum anticipated standard deviation of the residual after removal of
the mean and hidden factor. A safe choice for this would be the standard
deviation of the data.
Note: Length includes 1 bit to describe whether there is a hidden factor or not.
"""
parameters = [ "mean", "sigma", "has_factor", "factor_loads", "factor_scores" ]
accuracy = 1e-4 # Allowable relative error in factor_loads
def __init__(self, data, mean_range=None, quantum=None, max_sigma=None, weighting=None):
Estimate.__init__(self, data, weighting)
self.K = data.shape[1]
weighted_data = transpose(transpose(data)*self.weighting)
self.mean = sum(weighted_data) / self.data_size
deviation = data - self.mean
data_outerproduct = sum( self.weighting[:,NewAxis,NewAxis]
* deviation[:,:,NewAxis] * deviation[:,NewAxis,:] )
sum_squares = diagonal(data_outerproduct)
root_sum_squares = sqrt(sum_squares)
correlation = data_outerproduct / outerproduct(root_sum_squares,root_sum_squares)
# NOTE: inefficient, only need largest
eigenvalues, eigenvectors = linear_algebra.eigenvectors(correlation)
largest = argmax(eigenvalues)
b = eigenvectors[largest]
b = b * sqrt(eigenvalues[largest] / dot(b,b))
def length(b): # (omits constant terms)
bb = b * b
b2 = sum(bb)
N_1 = self.data_size - 1.0
sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0)))
v_scale = (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2)
#w = deviation / sigma
#v = dot(w, b) * v_scale
#v2 = dot(v,v)
#residual = w - b[NewAxis,:]*v[:,NewAxis]
# ... however we can calculate what we need using just the covariance matrix:
Y = data_outerproduct / outerproduct(sigma,sigma)
Ybb = sum(sum(Y * outerproduct(b,b)))
wbv = Ybb * v_scale
v2 = wbv * v_scale
residual2 = (b2+1.0)*N_1 - 2*wbv + b2*v2
return N_1*sum(log(sigma)) \
+ 0.5*self.K*log(max(1e-30, self.data_size*v2)) \
+ 0.5*(N_1+self.K)*log(1+b2) \
+ 0.5*v2 \
+ 0.5*residual2
from scipy.optimize import fmin
b = fmin(length, b, xtol=self.accuracy, ftol=1e-10, disp=0)
b2 = dot(b,b)
if b2 * (1-self.accuracy*2)**2 * (self.data_size-1.0) <= self.K:
b[:] = 0.0
b2 = 0.0
self.sigma = sqrt(sum_squares / ((b*b+1.0)*(self.data_size-1.0)))
#Alternative calculation method: the iteration described in Wallace&Freeman
#while 1:
# bb = b * b
# b2 = sum(bb)
# if b2 * (self.data_size-1.0) < self.K:
# b[:] = 0.0
# b2 = 0.0
#
# self.sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0)))
#
# if b2 == 0.0: break
#
# Y = data_outerproduct / outerproduct(self.sigma,self.sigma)
#
# old_b = b
#
# b = ( dot(Y,b)
# * ( (1.0-self.K/((self.data_size-1.0)*b2))
# / ((self.data_size-1.0)*(1.0+b2)) ) )
#
# difference = b-old_b
# if dot(difference,difference) < self.sufficiently_close**2:
# break
# Irrelevant, but nicer
if sum(b) < 0.0: b *= -1.0
self.factor_loads = b * self.sigma
self.has_factor = (b2 > 0.0)
if self.has_factor:
self.factor_scores = dot(deviation/self.sigma,b) \
* ( (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2) )
else:
self.parameters = self.parameters[:4]
self.factor_scores = zeros(self.data.shape[0], Float)
if quantum == None or max_sigma == None or mean_range == None:
self.has_prior = False
else:
self.quantum = quantum
self.max_sigma = max_sigma
self.mean_range = mean_range
for i in xrange(self.K):
if not mean_range[i,0] < self.mean[i] < mean_range[i,1] or \
not quantum[i] < self.sigma[i] < max_sigma[i]:
self.violates_prior = True
def dimensions(self):
if not self.has_factor:
return self.K * 2
# mean, sigma, factor_loads, factor_scores
return self.K * 3 + self.data_size
def neglog_prior(self):
neglog_hasfactor_prior = -log(0.5)
neglog_mean_prior = sum(log(self.mean_range[:,1] - self.mean_range[:,0]))
neglog_sigma_prior = ( sum(log(log(self.max_sigma)-log(self.quantum)))
+ sum(log(self.sigma)) )
if not self.has_factor:
return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior )
def B(K):
if K == 1: return pi/2
elif K == 2: return pi
else: return 2.0*pi*B(K-2)/(K-1.0)
b = self.factor_loads / self.sigma
neglog_factor_loading_prior = ( log(B(self.K))
+ (self.K-1)/2.0 * log(1+dot(b,b)) )
sum_square_factor_scores = sum(self.factor_scores*self.factor_scores*self.weighting)
neglog_factor_score_prior = neglog_gaussian_likelihood(
self.data_size,1.0, sum_square_factor_scores, 1.0)
return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior +
neglog_factor_loading_prior + neglog_factor_score_prior )
def neglog_fisher(self):
if not self.has_factor:
return self.K*(-log(2)-2*log(self.data_size)) + 4*sum(log(self.sigma))
weighted_scores = self.weighting * self.factor_scores
sum_square_factor_scores = dot(weighted_scores,self.factor_scores)
sum_factor_scores = sum(weighted_scores)
b = self.factor_loads / self.sigma
return ( -self.K * log(2.0*self.data_size)
-self.K * log(self.data_size*sum_square_factor_scores - sum_factor_scores)
-(self.data_size-2.0) * log(1 + dot(b,b))
+6.0*sum(log(self.sigma)) )
def neglog_data(self):
result = zeros(len(self.data), Float)
for i in xrange(self.K):
column = ( self.data[:,i]
- self.mean[i]
- self.factor_loads[i]*self.factor_scores )
result += individual_neglog_gaussian_likelihoods(
self.sigma[i], column*column, self.quantum[i] )
return result
if __name__ == "__main__":
# Run tests on the various estimators if not "import"ed.
if 1:
data = random_array.normal(10.0, 1.0, 100)
model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 0.5)
print model
model.test()
print
model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 1000.0)
print model
model.test()
print
if 1:
data = random_array.random_integers(4,0, 100)
model = Discrete_estimate(data, 5)
print model
model.test()
print
model = Discrete_estimate(data, 5, array([0.25,1.0,1.0,1.0,10.0]))
print model
model.test()
print
if 1:
data = random_array.normal(5.0, 1.0, (10000, 5))
u0 = zeros(5, Float)
V = identity(5, Float)
model = Multivariate_gaussian_estimate(data, 1.0, 6.0, V, 1.0, u0)
print model
model.test()
print
if 1:
given_data = random_array.normal(0.0, 1.0, (100,2))
data = given_data[:,0] + given_data[:,1] + random_array.normal(0.0, 1.0, 100)
model = Regression_estimate(data, given_data, 1.0, 0.1, 10.0)
print model
model.test()
print
if 1:
mean_range = array([[-10.0, 10.0]]*5)
quantum = array([0.01]*5)
max_sigma = array([10.0]*5)
data = random_array.normal(5.0, 0.1, (1000, 5))
model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma)
print model
total = 0.0
for i in xrange(5):
temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0)
total += temp.length()
print "As 5 independent Gaussians would yield length %.2f" % (total)
model.test()
print
data += outerproduct(random_array.normal(0.0, 1.0, 1000), [1.0,2.0,0.0,0.0,-0.5])
model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma)
print model
total = 0.0
for i in xrange(5):
temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0)
total += temp.length()
print "As 5 independent Gaussians would yield length %.2f" % (total)
model.test()
print