estimate.py


#    Copyright (C) 2004 Paul Harrison
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


""" MML estimators. """

from numarray import *
from numarray import random_array, linear_algebra, arrayprint

import sys

euler_constant = 0.577215664901532860606512090082402431042

def dimension_term(D):
    """ Approximation of the dimension-dependent terms in MML87.
    
        D : integer 
            number of dimensions
                 
        Error is of order 0.3 nit.
    """
    return -0.5*D*log(2*pi) + 0.5*log(D*pi) - euler_constant


def neglog_gaussian_likelihood(N, sigma, sum_of_squares, quantum=1.0):
    """ Return the total negative-log-likelihood of some data sampled from a Gaussian
        source.
        
        N : float 
            number of items
            
        sigma : float
            standard deviation of the source
            
        sum_of_squares : float
            sum of squares of the data
            
        quantum : float
            measurement accuracy
        
        Note: This likelihood is a slight approximation, 
              as the Gaussian distribution is not discrete.
    """
    
    return N*(-log(quantum)+0.5*log(2*pi)+log(sigma)) + sum_of_squares/(2*sigma*sigma)


def individual_neglog_gaussian_likelihoods(sigma, squares, quantum=1.0):
    """ Return the negative-log-likelihood of some data sampled from a Gaussian
        source.
                    
        sigma : float
            standard deviation of the source
            
        squares : N-array of float
            sum of squares of the data
            
        quantum : float
            measurement accuracy
        
        Note: This likelihood is a slight approximation, 
              as the Gaussian distribution is not discrete.
    """
    
    return squares/(2*sigma*sigma) + (-log(quantum)+0.5*log(2*pi)+log(sigma))


def neglog_multivariate_gaussian_likelihood(N, K, concentration, total_data_outerproduct,
                                         quantum):
    """ Multivariate equivalent of neglog_gaussian_likelihood
    
        N : float
            number of items
            
        K : float
            size of data vectors
            
        concentration : KxK array of floats
            inverse of the covariance matrix
            
        total_data_outerproduct : KxK array of floats
            total outerproduct of the data
            
        quantum : float
            measurement accuracy (volume) 
    
    """
    
    # Note: sum(ravel(total_data_outerproduct*concentration))
    #       equals dot(dot(transpose(data),concentration,data)
    
    return (
           N*( 
               - log(quantum) 
               - 0.5*log(linear_algebra.determinant(concentration)) 
               + 0.5*K*log(2*pi) 
           ) 
           + 0.5 * sum(ravel(total_data_outerproduct * concentration))
    )


def individual_neglog_multivariate_gaussian_likelihoods(
        K, concentration, data_outerproduct, quantum):
    """ Multivariate equivalent of individual_neglog_gaussian_likelihoods
    
        K : float
            size of data vectors
            
        concentration : KxK array of floats
            inverse of the covariance matrix
            
        data_outerproduct : NxKxK array of floats
            outerproduct of each datum
            
        quantum : float
            measurement accuracy (volume) 
    
    """
    
    return (0.5*sum(reshape(data_outerproduct * concentration, (-1,K*K)),1)
            + (- log(quantum) 
               - 0.5*log(linear_algebra.determinant(concentration)) 
               + 0.5*K*log(2*pi)) )


class Estimate:
    """ Abstract base class for MML87 estimators.
    
        Fields:
        
           data : 
               The input data.
           
           weighting : array of floats 
               If estimate is being used as part of a mixture model, the level of
               assignment of each data point to the class this estimate is for.
               Otherwise, an array of ones.
                       
           data_size : 
               Total of "weighting", i.e. normally just the len(data).
                       
           violates_prior : boolean
               Do any model parameters exceed bounds on the given prior?


        Sub-classes should define:
           
           def dimensions(self) : number of dimensions in the model
        
           def neglog_prior(self) : negative log of the prior density
           
           def neglog_fisher(self) : negative log of the determinant of the Fisher matrix
           
           def neglog_data(self) : negative log likelihood of each datum
        
           (see documentation)   
    """
    
    parameters = [ ] # Names of parameters in the estimate
    
    def __init__(self, data, weighting=None):
        self.data = data
        
        if weighting != None:
            self.weighting = weighting
        else:
            self.weighting = ones(len(data), Float)
        
        self.data_size = sum(self.weighting)
        
        self.has_prior = True
        self.violates_prior = False
        
    def __repr__(self):
        """ Display the estimate's model, length, and whether it violates the prior. """
        
        result = self.__class__.__name__ + ":"
        
        if self.has_prior:
            result += " %.2f nits" % self.length()
            
        if self.violates_prior:
            result += "\n*** Prior bounds violated! ***"
        
        for item in self.parameters:
            result += "\n%15s=" % item 
            data = getattr(self,item)
            if isinstance(data,float):
                result += "%.2f" % data
            elif type(data) != type(array([])):
                result += repr(data)
            elif data.shape[0] > 100:
                result += "< a large array of numbers >"
            else:
                result += arrayprint.array2string(data, precision=2, suppress_small=True) \
                          .replace("\n", "\n"+" "*16)
        
        return result
        
    def total_neglog_data(self):
        return sum(self.neglog_data() * self.weighting)

    def length(self):
        """ Calculate the message length. """
        assert self.has_prior
    
        return   self.neglog_prior() \
               - 0.5*self.neglog_fisher() \
               + self.total_neglog_data() \
               + dimension_term(self.dimensions())
    
    def test(self):
        """ Simple numerical self-consistency test:
        
            Is there any single parameter that, if altered slightly, reduces the length?
            If so, the estimate is not consistent with the length calculation.
            
            Also tests that total_neglog_data and neglog_data are consistent.
            
            Limitation: Does not detect the length calculation being out by a constant.
        """
        base_length = self.length()
        any_bad = False
        print "Testing"

        discrepancy = abs(self.total_neglog_data() - Estimate.total_neglog_data(self))
        if discrepancy > 1e-6:
            print "Inconsistency between total_neglog_data and neglog_data:", discrepancy
            any_bad = True
        
        def get(position):
            if shape == (): 
                return getattr(self,parameter)
            else:
                return data[position]
                
        def set(position, value):
            if shape == (): 
                setattr(self,parameter,value)
            else:
                data[position] = value
        
        originals = { }
        for parameter in self.parameters:
            data = getattr(self,parameter)
            if isinstance(data,float):
                originals[parameter] = data
            elif type(data) == type(array([])):
                originals[parameter] = data.copy()
                
        for parameter in originals:
            data = getattr(self,parameter)
            if isinstance(data,float):
                shape = ()
            elif type(data) != type(array([])):
                continue
            else:
                shape = data.shape
                
            positions = [ () ]
            for item in shape:
                new_positions = [ ]
                for i in xrange(item):
                    new_positions.extend([ j+(i,) for j in positions ])
                positions = new_positions
            
            for position in positions:
                for factor in [ 0.5, 0.9, 0.99, 1.01, 1.1, 2.0 ]:
                    data = getattr(self,parameter)
                    original = get(position)
                    set(position, original * factor)
                    self._test_prep()
                    l2 = self.length()
                    
                    for parameter1, value in originals.items():
                        if type(value) == type(array([])):
                            value = value.copy()
                        setattr(self,parameter1,value)
                
                    if l2-base_length < -1e-6:
                        print "Length decreased: ", parameter, position, factor, l2-base_length
                        any_bad = True
        
        if any_bad:
            print "-- FAILED!"
        else:
            print "-- OK"
        return not any_bad
        
    def _test_prep(self): pass
    # ^ This can be used to update any intermediate results if parameters change,
    #   allowing test() to properly test an estimator



class Gaussian_estimate(Estimate):
    """ Gaussian distribution estimate.
        (Identical to classical estimate, but gives a message length.)
    
        data : array of float
            The data.
            
    Required only if an explicit length is needed:

        mean_range : (float, float)
            The minimum and maximum possible values for the mean.
        
        quantum : float
            The measurement accuracy.
            Also, the minimum possible sigma.
        
        max_sigma : float
            The maximum possible sigma.
    
    """
    
    parameters = [ 'mean', 'sigma' ]
    
    def __init__(self, data, 
                 mean_range=None, quantum=None, max_sigma=None, 
                 weighting=None):
        Estimate.__init__(self, data, weighting)
        
        self.mean = sum(self.data*self.weighting) / self.data_size
        
        deviation = data - self.mean
        self.squared_deviation = deviation*deviation
        self.sum_squared_deviation = sum(self.squared_deviation*self.weighting)
        
        self.sigma = (self.sum_squared_deviation / (self.data_size-1.0)) ** 0.5
        
        if mean_range == None or quantum == None or max_sigma == None:
            self.has_prior = False
        else:
            self.has_prior = True
            self.quantum = quantum
            self.mean_range = mean_range
            self.max_sigma = max_sigma                

            self.violates_prior = \
                self.mean < self.mean_range[0] or \
                self.mean > self.mean_range[1] or \
                self.sigma < self.quantum or \
                self.sigma > self.max_sigma

    def dimensions(self):
        # mean, sigma
        return 2
    
    def neglog_prior(self):
        R_mean = self.mean_range[1] - self.mean_range[0]
        R_sigma = log(self.max_sigma) - log(self.quantum)
        
        return log(R_mean) + log(R_sigma) + log(self.sigma)        
    
    def neglog_fisher(self):
        return -log(2) - 2*log(self.data_size) + 4*log(self.sigma)

    def total_neglog_data(self):
        return neglog_gaussian_likelihood(self.data_size, self.sigma, 
                                          self.sum_squared_deviation, self.quantum)
                                          
    def neglog_data(self):
        return individual_neglog_gaussian_likelihoods(
                    self.sigma, self.squared_deviation, self.quantum)

        
class Discrete_estimate(Estimate):
    """ Discrete estimate.
        Given a list of items, estimate the likelihood of each type of item.
        
        Based on multinomial estimate described in the book, however the order 
        of items is also encoded in the message.
        
        data : array of integers
            Type of each item (0 <= item <= n_types).
        
        n_types : integer
            Number of types of item.

    Optional:
    
        alpha : n_types-array of float
            Conjugate prior, as though alpha[i] of each type i had already been observed.
            alpha[i] = 1.0 is a uniform prior, and the default.
    """
    
    parameters = [ "probability" ]

    def __init__(self, data, n_types, alpha=None, weighting=None):
        Estimate.__init__(self, data, weighting)
        
        self.n_types = n_types
        
        self.conjugate_prior = (alpha != None)
        if self.conjugate_prior:
            self.alpha = alpha
        else:
            self.alpha = ones(n_types, Float)
        
        sum_alpha = sum(self.alpha)
        
        self.counts = zeros(n_types, Float)
        for i in xrange(len(data)):
            self.counts[data[i]] += self.weighting[i]
        
        self.probability = ( (self.counts+self.alpha-0.5) 
                             / (self.data_size+sum_alpha-0.5*n_types) )
    
    def _test_prep(self):
        # Used by Estimate.test()
        self.probability /= sum(self.probability)
    
    def dimensions(self):
        # self.probability must add to 1.0, so the model-space has n-1 dimensions. 
        return self.n_types-1
    
    def neglog_prior(self):
        # Makes no actual difference, 
        # just don't depend on scipy unless necessary.
        if self.conjugate_prior:
            from scipy.special import gammaln
            
            return ( -sum(log(self.probability) * (self.alpha-1.0))
                     -gammaln(sum(self.alpha))
                     +sum(gammaln(self.alpha)) )
        else:
            return -sum(log(arange(1,self.n_types)))
    
    def neglog_fisher(self):
        return ( -(self.n_types-1.0) * log(self.data_size)
                 +sum(log(self.probability)) )
    
    def total_neglog_data(self):
        return -sum(self.counts * log(self.probability))
        
    def neglog_data(self):
        return take(-log(self.probability), self.data)


class Multivariate_gaussian_estimate(Estimate):
    """ Multivariate Gaussian distribution estimate, with a conjugate prior.
        
        data : NxK-array of floats
            The data to be encoded.
    
        m : float, > K, default K+1.0
            Number of "samples" in the prior for the covariance.
        
        V : KxK-array of floats, default quantum**(2/K)*identity(K)*m
            Total covariance matrix of the "samples", defining the prior on the covariance.
            Note that this is the total, sum x_i*x_j, not (sum x_i*x_j) / (m-1)
                 
        m1 : float, > 0.0, default 1.0
            Number of "samples" in the prior for the mean.
            
        u0 : K-array of floats (u0)
            Mean of the "samples" defining the prior on the mean.
    
    Required if an explicit length is needed:
    
        quantum : float
            *Volume* of the measurement accuracy of the data.
            
        
        Note: This class requires SciPy (for gammaln function)
    """
    
    parameters = [ "mean", "covariance" ]
    
    def __init__(self, data, quantum=None, m=None,V=None,m1=1.0,u0=None, weighting=None):
        assert quantum is not None or (V is not None and u0 is not None)
        
        Estimate.__init__(self, data, weighting)
        
        self.K = data.shape[1]
        
        if m is None:
            m = self.K + 1.0
        
        if V is None:
            V = identity(self.K) * (quantum**(2.0/self.K)) * m
            
        if u0 is None:
            u0 = zeros(self.K, Float)
        
        self.m = m
        self.V = V
        self.m1 = m1
        self.u0 = u0
        
        weighted_data = data * self.weighting[:,NewAxis]
        self.mean = (m1*u0 + sum(weighted_data)) / (m1 + self.data_size)
                   
        deviation = data - self.mean
        u0_deviation = u0 - self.mean
                
        self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis]
        
        self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct)
        
        total_covariance = m1*outerproduct(u0_deviation, u0_deviation) \
                           + V + self.total_data_outerproduct

        self.covariance = total_covariance / (self.data_size+m+self.K-2) 
        
        if quantum is None:
            self.has_prior = False
        else:
            self.quantum = quantum
        
            self.det_V = linear_algebra.determinant(self.V)
            self.concentration = linear_algebra.inverse(self.covariance)
            self.det_concentration = linear_algebra.determinant(self.concentration)
            
    def _test_prep(self):
        # Used by Estimate.test()
        self.concentration = linear_algebra.inverse(self.covariance)
        self.det_concentration = linear_algebra.determinant(self.concentration)
        deviation = data - self.mean
        self.data_outerproduct = deviation[:,NewAxis,:] * deviation[:,:,NewAxis]
        self.total_data_outerproduct = sum(self.weighting[:,NewAxis,NewAxis] * self.data_outerproduct)
            
    def dimensions(self):
        # K parameters for the mean, 
        # K*(K+1)/2 for the covariance (covariance matrix is symmetric)
        return self.K + self.K*(self.K+1)/2
        
    def neglog_prior(self):
        from scipy.special import gammaln
        
        h0_prior = log(self.det_concentration)
        
        log_gamma_sum = 0.0
        for i in xrange(1,self.K+1):
            log_gamma_sum += gammaln( (self.m-i)/2.0 )
        
        concentration_prior = (
            log(pi) * self.K*(self.K-1)/4.0 
            - log(2) * self.K*(self.m-1)/2.0 
            + log_gamma_sum 
            - log(self.det_V) * (self.m-self.K-2)/2.0 
            - log(self.det_concentration) * (self.m-1)/2.0 
            + 0.5 * trace(dot(self.concentration, self.V)) 
        ) # (Wishart distribution)

        u0_deviation = self.u0 - self.mean       
        mean_prior = neglog_multivariate_gaussian_likelihood(
                1.0, self.K, self.m1*self.concentration, 
                outerproduct(u0_deviation, u0_deviation), 1.0 )
                
        # Was... (oops)
        #mean_prior = neglog_multivariate_gaussian_likelihood(
        #        1.0, self.K, self.m1*self.concentration, 
        #        outerproduct(self.u0, self.u0), 1.0 )
                                
        return h0_prior + concentration_prior + mean_prior
    
    def neglog_fisher(self):
        return - (self.K + self.K*(self.K+1)/2) * log(self.data_size) \
               + self.K*log(2.0) + self.K*log(self.det_concentration)
        
    def total_neglog_data(self):
        return neglog_multivariate_gaussian_likelihood(
            self.data_size, self.K, self.concentration, 
            self.total_data_outerproduct, self.quantum)
            
    def neglog_data(self):
        return individual_neglog_multivariate_gaussian_likelihoods(
            self.K, self.concentration, self.data_outerproduct, self.quantum)

                                                            
class Regression_estimate(Estimate):
    """ Regression with a Gaussian prior for the model weights.
    
        data : N-array of floats
            The data to be encoded.
            
        given_data : NxK-array of floats
            Some data known by both sender and receiver,
            which may be used to help transmit the data efficiently.
            Each column should be normalized to have standard deviation of one and a mean
            of zero. If a constant term is required (ie if the data has non-zero mean),
            include a column consisting entirely of ones.
        
        m : float
            The expected ratio of the variance of model weights to the variance of the
            residual. m=1 would be a neutral choice.
            
    Required only if an explicit length is needed:
    
        quantum : float
            The measurement accuracy of the data.
            
        max_sigma : float
            The maximum expected standard deviation of the residual.
            A safe choice for this would be the standard deviation of the data.
    
    """
    
    parameters = [ "sigma", "model" ]
    
    def __init__(self, data, given_data, 
                 m=1.0, quantum=None, max_sigma=None, 
                 weighting=None):
        Estimate.__init__(self, data, weighting)
        
        self.m = m
        
        self.model_size = given_data.shape[1]        
        
        transposed_weighted_given_data = transpose(given_data) * self.weighting
        
        self.left_hand_side = dot(transposed_weighted_given_data, given_data) + \
                 m * identity(self.model_size)
        
        right_hand_side = dot(transposed_weighted_given_data, data)
        self.model = \
            linear_algebra.solve_linear_equations(self.left_hand_side, right_hand_side)
        
        residual = data - dot(given_data, self.model)
        self.squared_residual = residual*residual
        self.total_squared_residual = sum(self.squared_residual*self.weighting)
        
        model_squared = dot(self.model, self.model)
        self.sigma = ((self.total_squared_residual+m*model_squared) 
                      / self.data_size) ** 0.5
        
        if quantum == None or max_sigma == None:
            self.has_prior = False
        else:
            self.quantum = quantum
            self.max_sigma = max_sigma            
            
            self.violates_prior = self.sigma < self.quantum or \
                                  self.sigma > self.max_sigma
                                  
    def _test_prep(self):
        # used by Estimate.test()
        residual = data - dot(given_data, self.model)
        self.squared_residual = residual*residual
        self.total_squared_residual = sum(self.squared_residual*self.weighting)

    def dimensions(self):
        return self.model_size + 1

    def neglog_prior(self):
        R_sigma = log(self.max_sigma) - log(self.quantum)
        model_squared = dot(self.model, self.model)
        
        return ( log(R_sigma) + log(self.sigma)
                 + neglog_gaussian_likelihood(self.model_size, self.sigma / (self.m**0.5),
                                              model_squared, 1.0) )
    
    def neglog_fisher(self):
        return ( - log(2*(self.model_size+self.data_size)) 
                 - log(linear_algebra.determinant(self.left_hand_side))
                 + (self.model_size*2+2)*log(self.sigma) )
    
    def total_neglog_data(self):
        return neglog_gaussian_likelihood(self.data_size, self.sigma,
                                          self.total_squared_residual, self.quantum)
    
    def neglog_data(self):
        return individual_neglog_gaussian_likelihoods(
                    self.sigma, self.squared_residual, self.quantum)


class Hidden_factor_estimate(Estimate):
    """ Model of multivariate data as having a single hidden factor,
        as described in the paper "Single Factor Analysis by MML Estimation"
        by C.S. Wallace and P.R. Freeman.
        
        data : NxK-array of floats
            The data to be encoded.
                
    Required only if an explicit length is needed:
        
        mean_range : Kx2-array of Float
            Anticipated minimum and maximum values for the mean.
        
        quantum : K-array of Float
            Measurement accuracy.
            
        max_sigma : K-array of Float
            The maximum anticipated standard deviation of the residual after removal of
            the mean and hidden factor. A safe choice for this would be the standard
            deviation of the data.
    
    Note: Length includes 1 bit to describe whether there is a hidden factor or not.
    """
    
    parameters = [ "mean", "sigma", "has_factor", "factor_loads", "factor_scores" ]
    
    accuracy = 1e-4 # Allowable relative error in factor_loads
    
    def __init__(self, data, mean_range=None, quantum=None, max_sigma=None, weighting=None):
        Estimate.__init__(self, data, weighting)
        
        self.K = data.shape[1]
        
        weighted_data = transpose(transpose(data)*self.weighting)
        self.mean = sum(weighted_data) / self.data_size
                   
        deviation = data - self.mean        

        data_outerproduct = sum( self.weighting[:,NewAxis,NewAxis]
                                 * deviation[:,:,NewAxis] * deviation[:,NewAxis,:] )
        
        sum_squares = diagonal(data_outerproduct)
        root_sum_squares = sqrt(sum_squares)
        correlation = data_outerproduct / outerproduct(root_sum_squares,root_sum_squares) 

        # NOTE: inefficient, only need largest
        eigenvalues, eigenvectors = linear_algebra.eigenvectors(correlation)
        
        largest = argmax(eigenvalues)
        
        b = eigenvectors[largest] 
        b = b * sqrt(eigenvalues[largest] / dot(b,b))
                
        def length(b): # (omits constant terms)
            bb = b * b
            b2 = sum(bb)
            N_1 = self.data_size - 1.0
            
            sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0)))
                        
            v_scale = (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2)            
                                
            #w = deviation / sigma
            #v = dot(w, b) * v_scale
            #v2 = dot(v,v)
            #residual = w - b[NewAxis,:]*v[:,NewAxis]
            # ... however we can calculate what we need using just the covariance matrix:
            Y = data_outerproduct / outerproduct(sigma,sigma)
            Ybb = sum(sum(Y * outerproduct(b,b)))             
            wbv = Ybb * v_scale
            v2 = wbv * v_scale
            residual2 = (b2+1.0)*N_1 - 2*wbv + b2*v2
            
            return N_1*sum(log(sigma)) \
                   + 0.5*self.K*log(max(1e-30, self.data_size*v2)) \
                   + 0.5*(N_1+self.K)*log(1+b2) \
                   + 0.5*v2 \
                   + 0.5*residual2
                    
        from scipy.optimize import fmin
        b = fmin(length, b, xtol=self.accuracy, ftol=1e-10, disp=0)
        
        b2 = dot(b,b)
        if b2 * (1-self.accuracy*2)**2 * (self.data_size-1.0) <= self.K:
            b[:] = 0.0
            b2 = 0.0

        self.sigma = sqrt(sum_squares / ((b*b+1.0)*(self.data_size-1.0)))
        
        #Alternative calculation method: the iteration described in Wallace&Freeman
        #while 1:
        #    bb = b * b
        #    b2 = sum(bb)
        #    if b2 * (self.data_size-1.0) < self.K:
        #        b[:] = 0.0
        #        b2 = 0.0
        #    
        #    self.sigma = sqrt(sum_squares / ((bb+1.0)*(self.data_size-1.0)))
        #    
        #    if b2 == 0.0: break
        #    
        #    Y = data_outerproduct / outerproduct(self.sigma,self.sigma)
        #    
        #    old_b = b
        #    
        #    b = ( dot(Y,b)
        #          * ( (1.0-self.K/((self.data_size-1.0)*b2))
        #              / ((self.data_size-1.0)*(1.0+b2)) )    )
        #           
        #    difference = b-old_b
        #    if dot(difference,difference) < self.sufficiently_close**2:
        #        break 
               
        # Irrelevant, but nicer
        if sum(b) < 0.0: b *= -1.0
        
        self.factor_loads = b * self.sigma
        self.has_factor = (b2 > 0.0)
        if self.has_factor:
            self.factor_scores = dot(deviation/self.sigma,b) \
                                 * ( (1.0-self.K/((self.data_size-1.0)*b2)) / (1+b2) )
        else:
            self.parameters = self.parameters[:4]
            self.factor_scores = zeros(self.data.shape[0], Float)
        
        if quantum == None or max_sigma == None or mean_range == None:
            self.has_prior = False
        else:
            self.quantum = quantum
            self.max_sigma = max_sigma
            self.mean_range = mean_range
            
            for i in xrange(self.K):
                if not mean_range[i,0] < self.mean[i] < mean_range[i,1] or \
                   not quantum[i] < self.sigma[i] < max_sigma[i]:
                    self.violates_prior = True
        
    def dimensions(self):
        if not self.has_factor:
            return self.K * 2
            
        # mean, sigma, factor_loads, factor_scores
        return self.K * 3 + self.data_size

    def neglog_prior(self):
        neglog_hasfactor_prior = -log(0.5)
    
        neglog_mean_prior = sum(log(self.mean_range[:,1] - self.mean_range[:,0]))
        neglog_sigma_prior = ( sum(log(log(self.max_sigma)-log(self.quantum)))
                               + sum(log(self.sigma)) )
                               
        if not self.has_factor:
            return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior )
        
        def B(K):
            if K == 1: return pi/2
            elif K == 2: return pi
            else: return 2.0*pi*B(K-2)/(K-1.0)
            
        b = self.factor_loads / self.sigma
                                       
        neglog_factor_loading_prior = ( log(B(self.K)) 
                                        + (self.K-1)/2.0 * log(1+dot(b,b)) )
                               
        sum_square_factor_scores = sum(self.factor_scores*self.factor_scores*self.weighting)
        neglog_factor_score_prior = neglog_gaussian_likelihood(
                                        self.data_size,1.0, sum_square_factor_scores, 1.0)
        
        return ( neglog_hasfactor_prior + neglog_mean_prior + neglog_sigma_prior +
                 neglog_factor_loading_prior + neglog_factor_score_prior )    
    
    def neglog_fisher(self):
        if not self.has_factor:
            return self.K*(-log(2)-2*log(self.data_size)) + 4*sum(log(self.sigma))
    
        weighted_scores = self.weighting * self.factor_scores
        sum_square_factor_scores = dot(weighted_scores,self.factor_scores)
        sum_factor_scores = sum(weighted_scores) 
            
        b = self.factor_loads / self.sigma
        
        return ( -self.K * log(2.0*self.data_size)
                 -self.K * log(self.data_size*sum_square_factor_scores - sum_factor_scores)
                 -(self.data_size-2.0) * log(1 + dot(b,b))
                 +6.0*sum(log(self.sigma)) )
    
    def neglog_data(self):
        result = zeros(len(self.data), Float)
        
        for i in xrange(self.K):
            column = ( self.data[:,i]
                       - self.mean[i]
                       - self.factor_loads[i]*self.factor_scores )
            result += individual_neglog_gaussian_likelihoods(
                          self.sigma[i], column*column, self.quantum[i] )
        
        return result


if __name__ == "__main__":
    # Run tests on the various estimators if not "import"ed.

    if 1:
        data = random_array.normal(10.0, 1.0, 100)
        model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 0.5)
        print model
        model.test()
        print
        
        model = Gaussian_estimate(data, (-100.0,100.0), 0.1, 1000.0)
        print model
        model.test()
        print

    if 1:
        data = random_array.random_integers(4,0, 100)
        model = Discrete_estimate(data, 5)
        print model
        model.test()
        print
        
        model = Discrete_estimate(data, 5, array([0.25,1.0,1.0,1.0,10.0]))
        print model
        model.test()
        print
        
    if 1:
        data = random_array.normal(5.0, 1.0, (10000, 5))
        u0 = zeros(5, Float)
        V = identity(5, Float)
        model = Multivariate_gaussian_estimate(data, 1.0, 6.0, V, 1.0, u0)
        print model
        model.test()
        print

    if 1:
        given_data = random_array.normal(0.0, 1.0, (100,2))
        data = given_data[:,0] + given_data[:,1] + random_array.normal(0.0, 1.0, 100)
        model = Regression_estimate(data, given_data, 1.0, 0.1, 10.0)
        print model
        model.test()
        print

    
    if 1:
        mean_range = array([[-10.0, 10.0]]*5)
        quantum = array([0.01]*5)
        max_sigma = array([10.0]*5)
        
        data = random_array.normal(5.0, 0.1, (1000, 5))
        model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma)
        print model
                
        total = 0.0
        for i in xrange(5):
            temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0)
            total += temp.length()
        print "As 5 independent Gaussians would yield length %.2f" % (total)
        
        model.test()
        print
        data += outerproduct(random_array.normal(0.0, 1.0, 1000), [1.0,2.0,0.0,0.0,-0.5])
        model = Hidden_factor_estimate(data, mean_range, quantum, max_sigma)
        print model
        total = 0.0
        
        for i in xrange(5):
            temp = Gaussian_estimate(data[:,i], (-10.0,10.0), 0.01, 10.0)
            total += temp.length()
        print "As 5 independent Gaussians would yield length %.2f" % (total)
        
        model.test()
        print