From 4a0c97d81af3605291c904b87a68bfeb881b8889 Mon Sep 17 00:00:00 2001
From: Manuel Baum <mbaum@techfak.uni-bielefeld.de>
Date: Thu, 11 Jul 2013 18:56:27 +0200
Subject: [PATCH] fixed some bug in computation of acceptance probability in
 MetropolisHastings, now works with restrained value ranges too. Also changed
 sample_local in continuous node to direct sampling so there is no rejection
 sampling anymore

---
 examples/MCMC_Continuous.py                  | 21 +++---
 primo/reasoning/ContinuousNode.py            | 71 +++++++++++++++++---
 primo/reasoning/Evidence.py                  | 21 +++---
 primo/reasoning/MCMC.py                      | 15 +++--
 primo/reasoning/MarkovChainSampler.py        |  6 +-
 primo/reasoning/__init__.py                  |  2 +-
 primo/reasoning/density/LinearBeta.py        | 28 +++++++-
 primo/reasoning/density/LinearExponential.py | 14 +++-
 primo/reasoning/density/LinearGauss.py       | 15 ++++-
 9 files changed, 144 insertions(+), 49 deletions(-)

diff --git a/examples/MCMC_Continuous.py b/examples/MCMC_Continuous.py
index b419255..42da8a7 100644
--- a/examples/MCMC_Continuous.py
+++ b/examples/MCMC_Continuous.py
@@ -43,10 +43,12 @@ diameter_parameters=LinearBetaParameters(-10.0,{age:4.0},10.0,{age:-4.0})
 diameter.set_density_parameters(diameter_parameters)
 
 
-mcmc_ask=MCMC(bn,100000,convergence_test=ConvergenceTestSimpleCounting(50000))
+mcmc_ask=MCMC(bn,10000,convergence_test=ConvergenceTestSimpleCounting(5000))
 
 
 print "------PriorMarginal:------"
+
+
 pm=mcmc_ask.calculate_PriorMarginal([age],Gauss)
 print pm
 print "Ground truth: mu=0.5 C=[0.25]"
@@ -56,19 +58,20 @@ print pm
 print ""
 
 
-evidence={age:EvEqual(2)}
-
-
-#print "ProbabilityOfEvidence: " 
-#poe=mcmc_ask.calculate_PoE(evidence)
-#print poe
-
 print "------PosteriorMarginal:------"
-pm=mcmc_ask.calculate_PosteriorMarginal([age,height],evidence,Gauss)
+pm=mcmc_ask.calculate_PosteriorMarginal([age,height],{age:EvEqual(2)},Gauss)
 #pm=mcmc_ask.calculate_PosteriorMarginal([height],evidence,Gauss)
 print "P(age,height|age=2):"
 print pm
 print "Ground truth: age=2, height=mu:1.9,C=0.3"
+print ""
+
+pm=mcmc_ask.calculate_PosteriorMarginal([age,height],{age:EvLower(0.1)},Gauss)
+#pm=mcmc_ask.calculate_PosteriorMarginal([height],evidence,Gauss)
+print "P(age,height|age<0.1):"
+print pm
+print "Ground truth: age=0:0.1, height=mu:-0.1:0.0,C=0.3"
+print ""
 
 print "------PropabilityOfEvidence------"
 poe=mcmc_ask.calculate_PoE({age:EvLower(0.347)})
diff --git a/primo/reasoning/ContinuousNode.py b/primo/reasoning/ContinuousNode.py
index 10c6ce1..02a67af 100644
--- a/primo/reasoning/ContinuousNode.py
+++ b/primo/reasoning/ContinuousNode.py
@@ -2,6 +2,7 @@
 
 from primo.reasoning import RandomNode
 import random
+import scipy
 
 class ContinuousNode(RandomNode):
     '''
@@ -53,18 +54,44 @@ class ContinuousNode(RandomNode):
         reimplement it by constructing the integral over the normalvariate in the
         intervalls allowed by the evidence and then generate a sample directly.
         Currently this method has O(inf).'''
-        std_walk=0.1
-        v=random.normalvariate(x,std_walk)
+        std_walk=1.0
+        
+        #intersect possible evidence-interval with value_range:
         if self in evidence.keys():
-            while not (evidence[self].is_compatible(v) and self.value_range[0]<v and v<self.value_range[1]):
-                v=random.normalvariate(x,std_walk)
+            evidence_range=evidence[self].get_interval()
+            lower_limit=max(self.value_range[0],evidence_range[0])
+            upper_limit=min(self.value_range[1],evidence_range[1])
         else:
-            while not (self.value_range[0]<v and v<self.value_range[1]):
-                v=random.normalvariate(x,std_walk)
-        #print "REEEEEEEEEEEEEEEEEEEEEEEEEEEEEE"+self.name+" " +str(v)
-        return v
+            lower_limit=self.value_range[0]
+            upper_limit=self.value_range[1]
+        
+        if lower_limit==upper_limit:
+            v=lower_limit
+        if lower_limit>upper_limit:
+            raise Exception("Intersection of random variable's value_range and"
+                "allowed Interval for Evidence is empty - no sampling possible")
+        
+        #v=random.normalvariate(x,std_walk)
+        #while not (lower_limit<=v and v<=upper_limit):
+        #    v=random.normalvariate(x,std_walk)
+        
+        #generate the actual sample
+        distribution=scipy.stats.norm(x, std_walk)
+        lower_cdf=distribution.cdf(lower_limit)
+        upper_cdf=distribution.cdf(upper_limit)
+        
+        sample_in_integral=random.uniform(lower_cdf, upper_cdf)
+        
+        sample=distribution.ppf(sample_in_integral)
         
-    def sample_global(self, state):
+        
+        
+        a=scipy.stats.norm(self.value_range[0], std_walk).cdf(x)
+        b=scipy.stats.norm(self.value_range[0], std_walk).cdf(sample)
+        cdf_ratio = a/b 
+        return sample,cdf_ratio
+        
+    def sample_global(self, state, evidence):
         '''
         This method can be used to sample from this local distribution.
         
@@ -72,7 +99,31 @@ class ContinuousNode(RandomNode):
             values of this nodes parents in this dict and the conditional 
             probability density will be adjusted accordingly.
         '''
-        proposal=self.cpd.sample_global(state)
+        #is there some evidence for this node?
+        if self in evidence.keys():
+            #if only one value is allowed we can return it immediatly
+            unique=evidence[self].get_unique_value()
+            if unique!=None:
+                return unique
+            #if a whole interval is allowed intersect it with this variable's
+            #value_range to get limits for sampling
+            else:
+                evidence_range=evidence[self].get_interval()
+                lower_limit=max(self.value_range[0],evidence_range[0])
+                upper_limit=min(self.value_range[1],evidence_range[1])
+        #without evidence this variable's value_range represents limits for sampling
+        else:
+            lower_limit=self.value_range[0]
+            upper_limit=self.value_range[1]
+        #check if only one value is allowed and in case return immediatly
+        if lower_limit==upper_limit:
+            return lower_limit
+        #check for empty interval
+        if lower_limit>upper_limit:
+            raise Exception("Intersection of random variable's value_range and"
+                "allowed Interval for Evidence is empty - no sampling possible")
+        
+        proposal=self.cpd.sample_global(state,lower_limit,upper_limit)
         return proposal
         
     def get_probability(self, value, state):
diff --git a/primo/reasoning/Evidence.py b/primo/reasoning/Evidence.py
index 2946061..0974c9d 100644
--- a/primo/reasoning/Evidence.py
+++ b/primo/reasoning/Evidence.py
@@ -41,7 +41,7 @@ class EvidenceEqual(Evidence):
     def get_unique_value(self):
         return self.value
         
-class EvidenceIntervall(Evidence):
+class EvidenceInterval(Evidence):
     '''
     This class can be used to specify evidence that a variable has taken on
     some value in a defined interval.
@@ -54,27 +54,26 @@ class EvidenceIntervall(Evidence):
     def is_compatible(self, value):
         return self.min_val <= value and value<=self.max_val
         
+    def get_interval(self):
+        return self.min_val,self.max_val
+        
 
-class EvidenceLower(Evidence):
+class EvidenceLower(EvidenceInterval):
     '''
     This class can be used to specify evidence that a variable has taken on
     some value lower than some threshold.
     e.g. a<3
     '''
     def __init__(self,limit):
-        self.limit=limit
-        
-    def is_compatible(self, value):
-        return value<self.limit
+        super(EvidenceLower, self).__init__(float("-inf"),limit)
+
         
-class EvidenceHigher(Evidence):
+class EvidenceHigher(EvidenceInterval):
     '''
     This class can be used to specify evidence that a variable has taken on
     some value higher than some threshold.
     e.g. a>3
     '''
     def __init__(self,limit):
-        self.limit=limit
-        
-    def is_compatible(self, value):
-        return value>self.limit
+        super(EvidenceLower, self).__init__(limit,float("inf"))
+
diff --git a/primo/reasoning/MCMC.py b/primo/reasoning/MCMC.py
index a2b06a3..e2713a1 100644
--- a/primo/reasoning/MCMC.py
+++ b/primo/reasoning/MCMC.py
@@ -119,12 +119,13 @@ class MCMC(object):
         '''
         state={}
         for var in self.bn.get_nodes_in_topological_sort():
-            if var in evidence.keys():
-                value=evidence[var].get_unique_value()
-                if value == None:
-                    value=var.sample_global(state)
-                state[var]=value
-            else:
-                state[var]=var.sample_global(state)
+#            if var in evidence.keys():
+#                value=evidence[var].get_unique_value()
+#                if value == None:
+#                    value=var.sample_global(state,evidence)
+#                state[var]=value
+#            else:
+#                state[var]=var.sample_global(state)
+            state[var]=var.sample_global(state, evidence)
         return state
 
diff --git a/primo/reasoning/MarkovChainSampler.py b/primo/reasoning/MarkovChainSampler.py
index b2b792e..ced541f 100644
--- a/primo/reasoning/MarkovChainSampler.py
+++ b/primo/reasoning/MarkovChainSampler.py
@@ -101,12 +101,12 @@ class MetropolisHastingsTransitionModel(object):
         for node in nodes_to_resample:
             #propose a new value for this variable:
             current_value = state[node]
-            proposed_value = node.sample_local(current_value, extern_evidence)
+            proposed_value, cdf_ratio = node.sample_local(current_value, extern_evidence)
             
             p_of_proposal_given_mb = self._compute_p_of_value_given_mb(network, state, node, proposed_value)
             p_of_current_given_mb = self._compute_p_of_value_given_mb(network, state, node, current_value)
-
-            acceptance_probability = min(1.0,  p_of_proposal_given_mb/p_of_current_given_mb * 1.0/1.0)
+            #print "acceptance_probability = min(1.0,  "+str(p_of_proposal_given_mb)+" / "+str(p_of_current_given_mb) + " * "+str(cdf_ratio)
+            acceptance_probability = min(1.0,  p_of_proposal_given_mb/p_of_current_given_mb * cdf_ratio * 1.0/1.0)
             if random.random() <= acceptance_probability:
                 state[node]=proposed_value
             
diff --git a/primo/reasoning/__init__.py b/primo/reasoning/__init__.py
index 298e06c..26d0de9 100644
--- a/primo/reasoning/__init__.py
+++ b/primo/reasoning/__init__.py
@@ -3,7 +3,7 @@ from DiscreteNode import DiscreteNode
 from ContinuousNode import ContinuousNode
 from ContinuousNodeFactory import ContinuousNodeFactory
 
-from Evidence import EvidenceEqual, EvidenceIntervall, EvidenceLower, EvidenceHigher
+from Evidence import EvidenceEqual, EvidenceInterval, EvidenceLower, EvidenceHigher
 
 from MarkovChainSampler import MarkovChainSampler
 from MarkovChainSampler import GibbsTransitionModel
diff --git a/primo/reasoning/density/LinearBeta.py b/primo/reasoning/density/LinearBeta.py
index d1bc190..0ba8bec 100644
--- a/primo/reasoning/density/LinearBeta.py
+++ b/primo/reasoning/density/LinearBeta.py
@@ -73,9 +73,31 @@ class LinearBeta(Density):
         return self.output_scale*1.0/(1.0+math.exp(-x*self.input_scale))
 
 
-    def sample_global(self, state):
+    def sample_global(self, state, lower_limit, upper_limit):
         p=self._compute_p_given_parents(state)
         q=self._compute_q_given_parents(state)
-        value=random.betavariate(p,q)
+        #value=random.betavariate(p,q)
         #print "Sampled:"+str(value)
-        return value
+        #return value
+        
+        
+        
+        
+        
+        
+        #_lambda=self._compute_lambda_given_parents(state)
+        distribution=beta(p,q)
+        
+        lower_cdf=distribution.cdf(lower_limit)
+        upper_cdf=distribution.cdf(upper_limit)
+        
+        sample_in_integral=random.uniform(lower_cdf, upper_cdf)
+        
+        sample=distribution.ppf(sample_in_integral)
+        return sample
+        
+        
+        
+        
+        
+        
diff --git a/primo/reasoning/density/LinearExponential.py b/primo/reasoning/density/LinearExponential.py
index 54c2e18..a9c0155 100644
--- a/primo/reasoning/density/LinearExponential.py
+++ b/primo/reasoning/density/LinearExponential.py
@@ -37,7 +37,6 @@ class LinearExponential(Density):
         
         #Compute the offset for the density and displace the value accordingly
         _lambda = self._compute_lambda_given_parents(dict(node_value_pairs))
-        #print "lambda:"+str(_lambda)
         #Evaluate the displaced density at value
         return _lambda*math.exp(-_lambda*value)
 
@@ -49,8 +48,17 @@ class LinearExponential(Density):
         _lambda=self.output_scale*1.0/(1.0+math.exp(-x*self.input_scale))
         return _lambda
 
-    def sample_global(self,state):
+    def sample_global(self,state, lower_limit, upper_limit):
         _lambda=self._compute_lambda_given_parents(state)
-        sample=random.expovariate(_lambda)
+        distribution=scipy.stats.expon(loc=0,scale=1.0/_lambda)
+        
+        lower_cdf=distribution.cdf(lower_limit)
+        upper_cdf=distribution.cdf(upper_limit)
+        
+        sample_in_integral=random.uniform(lower_cdf, upper_cdf)
+        
+        sample=distribution.ppf(sample_in_integral)
+        
+        #sample=random.expovariate(_lambda)
         #print "EXPO-SAMPLE: "+str(sample)+" at lambda: "+str(_lambda)
         return sample
diff --git a/primo/reasoning/density/LinearGauss.py b/primo/reasoning/density/LinearGauss.py
index f1ec5fb..9b9d676 100644
--- a/primo/reasoning/density/LinearGauss.py
+++ b/primo/reasoning/density/LinearGauss.py
@@ -65,6 +65,17 @@ class LinearGauss(Density):
                 x = x + self.b[node]*state[node]
         return x
         
-    def sample_global(self,state):
-        return random.normalvariate(self._compute_offset_given_parents(state),self.var**0.5)
+    def sample_global(self,state,lower_limit,upper_limit):
+        
+        distribution=scipy.stats.norm(self._compute_offset_given_parents(state), self.var**0.5)
+        
+        lower_cdf=distribution.cdf(lower_limit)
+        upper_cdf=distribution.cdf(upper_limit)
+        
+        sample_in_integral=random.uniform(lower_cdf, upper_cdf)
+        
+        sample=distribution.ppf(sample_in_integral)
+    
+    
+        return sample
             
-- 
GitLab