```
#    Copyright (C) 2004 Paul Harrison
#    This program is free software; you can redistribute it and/or modify
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

""" MML estimators. """

from numarray import *
from numarray import random_array, linear_algebra, arrayprint

import sys

euler_constant = 0.577215664901532860606512090082402431042

def dimension_term(D):
""" Approximation of the dimension-dependent terms in MML87.

D : integer
number of dimensions

Error is of order 0.3 nit.
"""
return -0.5*D*log(2*pi) + 0.5*log(D*pi) - euler_constant

def neglog_gaussian_likelihood(N, sigma, sum_of_squares, quantum=1.0):
""" Return the total negative-log-likelihood of some data sampled from a Gaussian
source.

N : float
number of items

sigma : float
standard deviation of the source

sum_of_squares : float
sum of squares of the data

quantum : float
measurement accuracy

Note: This likelihood is a slight approximation,
as the Gaussian distribution is not discrete.
"""

return N*(-log(quantum)+0.5*log(2*pi)+log(sigma)) + sum_of_squares/(2*sigma*sigma)

def individual_neglog_gaussian_likelihoods(sigma, squares, quantum=1.0):
""" Return the negative-log-likelihood of some data sampled from a Gaussian
source.

sigma : float
standard deviation of the source

squares : N-array of float
sum of squares of the data

quantum : float
measurement accuracy

Note: This likelihood is a slight approximation,
as the Gaussian distribution is not discrete.
"""

return squares/(2*sigma*sigma) + (-log(quantum)+0.5*log(2*pi)+log(sigma))

def neglog_multivariate_gaussian_likelihood(N, K, concentration, total_data_outerproduct,
quantum):
""" Multivariate equivalent of neglog_gaussian_likelihood

N : float
number of items

K : float
size of data vectors

concentration : KxK array of floats
inverse of the covariance matrix

total_data_outerproduct : KxK array of floats
total outerproduct of the data

quantum : float
measurement accuracy (volume)

"""

# Note: sum(ravel(total_data_outerproduct*concentration))
#       equals dot(dot(transpose(data),concentration,data)

return (
N*(
- log(quantum)
- 0.5*log(linear_algebra.determinant(concentration))
+ 0.5*K*log(2*pi)
)
+ 0.5 * sum(ravel(total_data_outerproduct * concentration))
)

def individual_neglog_multivariate_gaussian_likelihoods(
K, concentration, data_outerproduct, quantum):
""" Multivariate equivalent of individual_neglog_gaussian_likelihoods

K : float
size of data vectors

concentration : KxK array of floats
inverse of the covariance matrix

data_outerproduct : NxKxK array of floats
outerproduct of each datum

quantum : float
measurement accuracy (volume)

"""

return (0.5*sum(reshape(data_outerproduct * concentration, (-1,K*K)),1)
+ (- log(quantum)
- 0.5*log(linear_algebra.determinant(concentration))
+ 0.5*K*log(2*pi)) )

class Estimate:
""" Abstract base class for MML87 estimators.

Fields:

data :
The input data.

weighting : array of floats
If estimate is being used as part of a mixture model, the level of
assignment of each data point to the class this estimate is for.
Otherwise, an array of ones.

data_size :
Total of "weighting", i.e. normally just the len(data).

violates_prior : boolean
Do any model parameters exceed bounds on the given prior?

Sub-classes should define:

def dimensions(self) : number of dimensions in the model

def neglog_prior(self) : negative log of the prior density

def neglog_fisher(self) : negative log of the determinant of the Fisher matrix

def neglog_data(self) : negative log likelihood of each datum

(see documentation)
"""

parameters = [ ] # Names of parameters in the estimate

def __init__(self, data, weighting=None):
self.data = data

if weighting != None:
self.weighting = weighting
else:
self.weighting = ones(len(data), Float)

self.data_size = sum(self.weighting)

self.has_prior = True
self.violates_prior = False

def __repr__(self):
""" Display the estimate's model, length, and whether it violates the prior. """

result = self.__class__.__name__ + ":"

if self.has_prior:
result += " %.2f nits" % self.length()

if self.violates_prior:
result += "\n*** Prior bounds violated! ***"

for item in self.parameters:
result += "\n%15s=" % item
data = getattr(self,item)
if isinstance(data,float):
result += "%.2f" % data
elif type(data) != type(array([])):
result += repr(data)
elif data.shape[0] > 100:
result += "< a large array of numbers >"
else:
result += arrayprint.array2string(data, precision=2, suppress_small=True) \
.replace("\n", "\n"+" "*16)

return result

def total_neglog_data(self):
return sum(self.neglog_data() * self.weighting)

def length(self):
""" Calculate the message length. """
assert self.has_prior

return   self.neglog_prior() \
- 0.5*self.neglog_fisher() \
+ self.total_neglog_data() \
+ dimension_term(self.dimensions())

def test(self):
""" Simple numerical self-consistency test:

Is there any single parameter that, if altered slightly, reduces the length?
If so, the estimate is not consistent with the length calculation.

Also tests that total_neglog_data and neglog_data are consistent.

Limitation: Does not detect the length calculation being out by a constant.
"""
base_length = self.length()
print "Testing"

discrepancy = abs(self.total_neglog_data() - Estimate.total_neglog_data(self))
if discrepancy > 1e-6:
print "Inconsistency between total_neglog_data and neglog_data:", discrepancy

def get(position):
if shape == ():
return getattr(self,parameter)
else:
return data[position]

def set(position, value):
if shape == ():
setattr(self,parameter,value)
else:
data[position] = value

originals = { }
for parameter in self.parameters:
data = getattr(self,parameter)
if isinstance(data,float):
originals[parameter] = data
elif type(data) == type(array([])):
originals[parameter] = data.copy()

for parameter in originals:
data = getattr(self,parameter)
if isinstance(data,float):
shape = ()
elif type(data) != type(array([])):
continue
else:
shape = data.shape

positions = [ () ]
for item in shape:
new_positions = [ ]
for i in xrange(item):
new_positions.extend([ j+(i,) for j in positions ])
positions = new_positions

for position in positions:
for factor in [ 0.5, 0.9, 0.99, 1.01, 1.1, 2.0 ]:
data = getattr(self,parameter)
original = get(position)
set(position, original * factor)
self._test_prep()
l2 = self.length()

for parameter1, value in originals.items():
if type(value) == type(array([])):
value = value.copy()
setattr(self,parameter1,value)

if l2-base_length < -1e-6:
print "Length decreased: ", parameter, position, factor, l2-base_length

print "-- FAILED!"
else:
print "-- OK"

def _test_prep(self): pass
# ^ This can be used to update any intermediate results if parameters change,
#   allowing test() to properly test an estimator

class Gaussian_estimate(Estimate):
""" Gaussian distribution estimate.
(Identical to classical estimate, but gives a message length.)

data : array of float
The data.

Required only if an explicit length is needed:

mean_range : (float, float)
The minimum and maximum possible values for the mean.

quantum : float
The measurement accuracy.
Also, the minimum possible sigma.

max_sigma : float
The maximum possible sigma.

"""

parameters = [ 'mean', 'sigma' ]

def __init__(self, data,
mean_range=None, quantum=None, max_sigma=None,
weighting=None):
Estimate.__init__(self, data, weighting)

self.mean = sum(self.data*self.weighting) / self.data_size

deviation = data - self.mean
self.squared_deviation = deviation*deviation
self.sum_squared_deviation = sum(self.squared_deviation*self.weighting)

self.sigma = (self.sum_squared_deviation / (self.data_size-1.0)) ** 0.5

if mean_range == None or quantum == None or max_sigma == None:
self.has_prior = False
else:
self.has_prior = True
self.quantum = quantum
self.mean_range = mean_range
self.max_sigma = max_sigma

self.violates_prior = \
self.mean < self.mean_range[0] or \
self.mean > self.mean_range[1] or \
self.sigma < self.quantum or \
self.sigma > self.max_sigma

def dimensions(self):
# mean, sigma
return 2

def neglog_prior(self):
R_mean = self.mean_range[1] - self.mean_range[0]
R_sigma = log(self.max_sigma) - log(self.quantum)

return log(R_mean) + log(R_sigma) + log(self.sigma)

def neglog_fisher(self):
return -log(2) - 2*log(self.data_size) + 4*log(self.sigma)

def total_neglog_data(self):
return neglog_gaussian_likelihood(self.data_size, self.sigma,
self.sum_squared_deviation, self.quantum)

def neglog_data(self):
return individual_neglog_gaussian_likelihoods(
self.sigma, self.squared_deviation, self.quantum)

class Discrete_estimate(Estimate):
""" Discrete estimate.
Given a list of items, estimate the likelihood of each type of item.

Based on multinomial estimate described in the book, however the order
of items is also encoded in the message.

data : array of integers
Type of each item (0 <= item <= n_types).

n_types : integer
Number of types of item.

Optional:

alpha : n_types-array of float
Conjugate prior, as though alpha[i] of each type i had already been observed.
alpha[i] = 1.0 is a uniform prior, and the default.
"""

parameters = [ "probability" ]

def __init__(self, data, n_types, alpha=None, weighting=None):
Estimate.__init__(self, data, weighting)

self.n_types = n_types

self.conjugate_prior = (alpha != None)
if self.conjugate_prior:
self.alpha = alpha
else:
self.alpha = ones(n_types, Float)

sum_alpha = sum(self.alpha)

self.counts = zeros(n_types, Float)
for i in xrange(len(data)):
self.counts[data[i]] += self.weighting[i]

self.probability = ( (self.counts+self.alpha-0.5)
/ (self.data_size+sum_alpha-0.5*n_types) )

def _test_prep(self):
# Used by Estimate.test()
self.probability /= sum(self.probability)

def dimensions(self):
# self.probability must add to 1.0, so the model-space has n-1 dimensions.
return self.n_types-1

def neglog_prior(self):
# Makes no actual difference,
# just don't depend on scipy unless necessary.
if self.conjugate_prior:
from scipy.special import gammaln

return ( -sum(log(self.probability) * (self.alpha-1.0))
-gammaln(sum(self.alpha))
+sum(gammaln(self.alpha)) )
else:
return -sum(log(arange(1,self.n_types)))

def neglog_fisher(self):
return ( -(self.n_types-1.0) * log(self.data_size)
+sum(log(self.probability)) )

def total_neglog_data(self):
return -sum(self.counts * log(self.probability))

def neglog_data(self):
return take(-log(self.probability), self.data)

class Multivariate_gaussian_estimate(Estimate):
""" Multivariate Gaussian distribution estimate, with a conjugate prior.

data : NxK-array of floats
The data to be encoded.

m : float, > K, default K+1.0
Number of "samples" in the prior for the covariance.

V : KxK-array of floats, default quantum**(2/K)*identity(K)*m
Total covariance matrix of the "samples", defining the prior on the covariance.
Note that this is the total, sum x_i*x_j, not (sum x_i*x_j) / (m-1)

m1 : float, > 0.0, default 1.0
Number of "samples" in the prior for the mean.

u0 : K-array of floats (u0)
Mean of the "samples" defining the prior on the mean.

Required if an explicit length is needed:

quantum : float
*Volume* of the measurement accuracy of the data.

Note: This class requires SciPy (for gammaln function)
"""

parameters = [ "mean", "covariance" ]

def __init__(self, data, quantum=None, m=None,V=None,m1=1.0,u0=None, weighting=None):
assert quantum is not None or (V is not None and u0 is not None)

Estimate.__init__(self, data, weighting)

self.K = data.shape[1]

if m is None:
m = self.K + 1.0

if V is None:
V = identity(self.K) * (quantum**(2.0/self.K)) * m

if u0 is None:
u0 = zeros(self.K, Float)

self.m = m
self.V = V
self.m1 = m1
self.u0 = u0

weighted_data = data * self.weighting[:,NewAxis]
self.mean = (m1*u0 + sum(weighted_data)) / (m1 + self.data_size)

deviation = data - self.mean
u0_deviation = u0 - self.mean

self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis]

self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct)

total_covariance = m1*outerproduct(u0_deviation, u0_deviation) \
+ V + self.total_data_outerproduct

self.covariance = total_covariance / (self.data_size+m+self.K-2)

if quantum is None:
self.has_prior = False
else:
self.quantum = quantum

self.det_V = linear_algebra.determinant(self.V)
self.concentration = linear_algebra.inverse(self.covariance)
self.det_concentration = linear_algebra.determinant(self.concentration)

def _test_prep(self):
# Used by Estimate.test()
self.concentration = linear_algebra.inverse(self.covariance)
self.det_concentration = linear_algebra.determinant(self.concentration)
deviation = data - self.mean
self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis]
self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct)

def dimensions(self):
# K parameters for the mean,
# K*(K+1)/2 for the covariance (covariance matrix is symmetric)
return self.K + self.K*(self.K+1)/2

def neglog_prior(self):
from scipy.special import gammaln

h0_prior = log(self.det_concentration)

log_gamma_sum = 0.0
for i in xrange(1,self.K+1):
log_gamma_sum += gammaln( (self.m-i)/2.0 )

concentration_prior = (
log(pi) * self.K*(self.K-1)/4.0
- log(2) * self.K*(self.m-1)/2.0
+ log_gamma_sum
- log(self.det_V) * (self.m-self.K-2)/2.0
- log(self.det_concentration) * (self.m-1)/2.0
+ 0.5 * trace(dot(self.concentration, self.V))
) # (Wishart distribution)

u0_deviation = self.u0 - self.mean
mean_prior = neglog_multivariate_gaussian_likelihood(
1.0, self.K, self.m1*self.concentration,
outerproduct(u0_deviation, u0_deviation), 1.0 )

# Was... (oops)
#mean_prior = neglog_multivariate_gaussian_likelihood(
#        1.0, self.K, self.m1*self.concentration,
#        outerproduct(self.u0, self.u0), 1.0 )

return h0_prior + concentration_prior + mean_prior

def neglog_fisher(self):
return - (self.K + self.K*(self.K+1)/2) * log(self.data_size) \
+ self.K*log(2.0) + self.K*log(self.det_concentration)

def total_neglog_data(self):
return neglog_multivariate_gaussian_likelihood(
self.data_size, self.K, self.concentration,
self.total_data_outerproduct, self.quantum)

def neglog_data(self):
return individual_neglog_multivariate_gaussian_likelihoods(
self.K, self.concentration, self.data_outerproduct, self.quantum)

class Regression_estimate(Estimate):
""" Regression with a Gaussian prior for the model weights.

data : N-array of floats
The data to be encoded.

given_data : NxK-array of floats
Some data known by both sender and receiver,
which may be used to help transmit the data efficiently.
Each column should be normalized to have standard deviation of one and a mean
of zero. If a constant term is required (ie if the data has non-zero mean),
include a column consisting entirely of ones.

m : float
The expected ratio of the variance of model weights to the variance of the
residual. m=1 would be a neutral choice.

Required only if an explicit length is needed:

quantum : float
The measurement accuracy of the data.

max_sigma : float
The maximum expected standard deviation of the residual.
A safe choice for this would be the standard deviation of the data.

"""

parameters = [ "sigma", "model" ]

def __init__(self, data, given_data,
m=1.0, quantum=None, max_sigma=None,
weighting=None):
Estimate.__init__(self, data, weighting)

self.m = m

self.model_size = given_data.shape[1]

transposed_weighted_given_data = transpose(given_data) * self.weighting

self.left_hand_side = dot(transposed_weighted_given_data, given_data) + \
m * identity(self.model_size)

right_hand_side = dot(transposed_weighted_given_data, data)
self.model = \
linear_algebra.solve_linear_equations(self.left_hand_side, right_hand_side)

residual = data - dot(given_data, self.model)
self.squared_residual = residual*residual
self.total_squared_residual = sum(self.squared_residual*self.weighting)

model_squared = dot(self.model, self.model)
self.sigma = ((self.total_squared_residual+m*model_squared)
/ self.data_size) ** 0.5

if quantum == None or max_sigma == None:
self.has_prior = False
else:
self.quantum = quantum
self.max_sigma = max_sigma

self.violates_prior = self.sigma < self.quantum or \
self.sigma > self.max_sigma

def _test_prep(self):
# used by Estimate.test()
residual = data - dot(given_data, self.model)
self.squared_residual = residual*residual
self.total_squared_residual = sum(self.squared_residual*self.weighting)

def dimensions(self):
return self.model_size + 1

def neglog_prior(self):
R_sigma = log(self.max_sigma) - log(self.quantum)
model_squared = dot(self.model, self.model)

return ( log(R_sigma) + log(self.sigma)
+ neglog_gaussian_likelihood(self.model_size, self.sigma / (self.m**0.5),
model_squared, 1.0) )

def neglog_fisher(self):
return ( - log(2*(self.model_size+self.data_size))
- log(linear_algebra.determinant(self.left_hand_side))
+ (self.model_size*2+2)*log(self.sigma) )

def total_neglog_data(self):
return neglog_gaussian_likelihood(self.data_size, self.sigma,
self.total_squared_residual, self.quantum)

def neglog_data(self):
return individual_neglog_gaussian_likelihoods(
self.sigma, self.squared_residual, self.quantum)

class Hidden_factor_estimate(Estimate):
""" Model of multivariate data as having a single hidden factor,
as described in the paper "Single Factor Analysis by MML Estimation"
by C.S. Wallace and P.R. Freeman.

data : NxK-array of floats
The data to be encoded.

Required only if an explicit length is needed:

mean_range : Kx2-array of Float
Anticipated minimum and maximum values for the mean.

quantum : K-array of Float
Measurement accuracy.

max_sigma : K-array of Float
The maximum anticipated standard deviation of the residual after removal of
the mean and hidden factor. A safe choice for this would be the standard
deviation of the data.

Note: Length includes 1 bit to describe whether there is a hidden factor or not.
"""

parameters = [ "mean", "sigma", "has_factor", "factor_loads", "factor_scores" ]

accuracy = 1e-4 # Allowable relative error in factor_loads

def __init__(self, data, mean_range=None, quantum=None, max_sigma=None, weighting=None):
Estimate.__init__(self, data, weighting)

self.K = data.shape[1]

weighted_data = transpose(transpose(data)*self.weighting)
self.mean = sum(weighted_data) / self.data_size

deviation = data - self.mean

data_outerproduct = sum( self.weighting[:,NewAxis,NewAxis]
* deviation[:,:,NewAxis] * deviation[:,NewAxis,:] )

sum_squares = diagonal(data_outerproduct)
root_sum_squares = sqrt(sum_squares)
correlation = data_outerproduct / outerproduct(root_sum_squares,root_sum_squares)

# NOTE: inefficient, only need largest
eigenvalues, eigenvectors = linear_algebra.eigenvectors(correlation)

largest = argmax(eigenvalues)

b = eigenvectors[largest]
b = b * sqrt(eigenvalues[largest] / dot(b,b))

def length(b): # (omits constant terms)
bb = b * b
b2 = sum(bb)
N_1 = self.data_size - 1.0

sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0)))

v_scale = (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2)

#w = deviation / sigma
#v = dot(w, b) * v_scale
#v2 = dot(v,v)
#residual = w - b[NewAxis,:]*v[:,NewAxis]
# ... however we can calculate what we need using just the covariance matrix:
Y = data_outerproduct / outerproduct(sigma,sigma)
Ybb = sum(sum(Y * outerproduct(b,b)))
wbv = Ybb * v_scale
v2 = wbv * v_scale
residual2 = (b2+1.0)*N_1 - 2*wbv + b2*v2

return N_1*sum(log(sigma)) \
+ 0.5*self.K*log(max(1e-30, self.data_size*v2)) \
+ 0.5*(N_1+self.K)*log(1+b2) \
+ 0.5*v2 \
+ 0.5*residual2

from scipy.optimize import fmin
b = fmin(length, b, xtol=self.accuracy, ftol=1e-10, disp=0)

b2 = dot(b,b)
if b2 * (1-self.accuracy*2)**2 * (self.data_size-1.0) <= self.K:
b[:] = 0.0
b2 = 0.0

self.sigma = sqrt(sum_squares / ((b*b+1.0)*(self.data_size-1.0)))

#Alternative calculation method: the iteration described in Wallace&Freeman
#while 1:
#    bb = b * b
#    b2 = sum(bb)
#    if b2 * (self.data_size-1.0) < self.K:
#        b[:] = 0.0
#        b2 = 0.0
#
#    self.sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0)))
#
#    if b2 == 0.0: break
#
#    Y = data_outerproduct / outerproduct(self.sigma,self.sigma)
#
#    old_b = b
#
#    b = ( dot(Y,b)
#          * ( (1.0-self.K/((self.data_size-1.0)*b2))
#              / ((self.data_size-1.0)*(1.0+b2)) )    )
#
#    difference = b-old_b
#    if dot(difference,difference) < self.sufficiently_close**2:
#        break

# Irrelevant, but nicer
if sum(b) < 0.0: b *= -1.0

self.has_factor = (b2 > 0.0)
if self.has_factor:
self.factor_scores = dot(deviation/self.sigma,b) \
* ( (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2) )
else:
self.parameters = self.parameters[:4]
self.factor_scores = zeros(self.data.shape[0], Float)

if quantum == None or max_sigma == None or mean_range == None:
self.has_prior = False
else:
self.quantum = quantum
self.max_sigma = max_sigma
self.mean_range = mean_range

for i in xrange(self.K):
if not mean_range[i,0] < self.mean[i] < mean_range[i,1] or \
not quantum[i] < self.sigma[i] < max_sigma[i]:
self.violates_prior = True

def dimensions(self):
if not self.has_factor:
return self.K * 2

return self.K * 3 + self.data_size

def neglog_prior(self):
neglog_hasfactor_prior = -log(0.5)

neglog_mean_prior = sum(log(self.mean_range[:,1] - self.mean_range[:,0]))
neglog_sigma_prior = ( sum(log(log(self.max_sigma)-log(self.quantum)))
+ sum(log(self.sigma)) )

if not self.has_factor:
return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior )

def B(K):
if K == 1: return pi/2
elif K == 2: return pi
else: return 2.0*pi*B(K-2)/(K-1.0)

+ (self.K-1)/2.0 * log(1+dot(b,b)) )

sum_square_factor_scores = sum(self.factor_scores*self.factor_scores*self.weighting)
neglog_factor_score_prior = neglog_gaussian_likelihood(
self.data_size,1.0, sum_square_factor_scores, 1.0)

return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior +

def neglog_fisher(self):
if not self.has_factor:
return self.K*(-log(2)-2*log(self.data_size)) + 4*sum(log(self.sigma))

weighted_scores = self.weighting * self.factor_scores
sum_square_factor_scores = dot(weighted_scores,self.factor_scores)
sum_factor_scores = sum(weighted_scores)

return ( -self.K * log(2.0*self.data_size)
-self.K * log(self.data_size*sum_square_factor_scores - sum_factor_scores)
-(self.data_size-2.0) * log(1 + dot(b,b))
+6.0*sum(log(self.sigma)) )

def neglog_data(self):
result = zeros(len(self.data), Float)

for i in xrange(self.K):
column = ( self.data[:,i]
- self.mean[i]
result += individual_neglog_gaussian_likelihoods(
self.sigma[i], column*column, self.quantum[i] )

return result

if __name__ == "__main__":
# Run tests on the various estimators if not "import"ed.

if 1:
data = random_array.normal(10.0, 1.0, 100)
model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 0.5)
print model
model.test()
print

model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 1000.0)
print model
model.test()
print

if 1:
data = random_array.random_integers(4,0, 100)
model = Discrete_estimate(data, 5)
print model
model.test()
print

model = Discrete_estimate(data, 5, array([0.25,1.0,1.0,1.0,10.0]))
print model
model.test()
print

if 1:
data = random_array.normal(5.0, 1.0, (10000, 5))
u0 = zeros(5, Float)
V = identity(5, Float)
model = Multivariate_gaussian_estimate(data, 1.0, 6.0, V, 1.0, u0)
print model
model.test()
print

if 1:
given_data = random_array.normal(0.0, 1.0, (100,2))
data = given_data[:,0] + given_data[:,1] + random_array.normal(0.0, 1.0, 100)
model = Regression_estimate(data, given_data, 1.0, 0.1, 10.0)
print model
model.test()
print

if 1:
mean_range = array([[-10.0, 10.0]]*5)
quantum = array([0.01]*5)
max_sigma = array([10.0]*5)

data = random_array.normal(5.0, 0.1, (1000, 5))
model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma)
print model

total = 0.0
for i in xrange(5):
temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0)
total += temp.length()
print "As 5 independent Gaussians would yield length %.2f" % (total)

model.test()
print
data += outerproduct(random_array.normal(0.0, 1.0, 1000), [1.0,2.0,0.0,0.0,-0.5])
model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma)
print model
total = 0.0

for i in xrange(5):
temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0)
total += temp.length()
print "As 5 independent Gaussians would yield length %.2f" % (total)

model.test()
print

```