Skip to main content

Construction & inference in Python

# __author__ = 'Bayes Server'
# __version__= '0.5'

import jpype # pip install jpype1 (version 1.2.1 or later)
import jpype.imports
from jpype.types import *

classpath = "lib/bayesserver-10.8.jar" # TODO download the Bayes Server Java API, and adjust the path

# Launch the JVM
jpype.startJVM(classpath=[classpath])

# import the Java modules

from com.bayesserver import *
from com.bayesserver.inference import *

network = Network('Demo')

# add the nodes/variables

aTrue = State('True')
aFalse = State('False')
a = Node('A', [aTrue, aFalse])

bTrue = State('True')
bFalse = State('False')
b = Node('B', [bTrue, bFalse])

cTrue = State('True')
cFalse = State('False')
c = Node('C', [cTrue, cFalse])

dTrue = State('True')
dFalse = State('False')
d = Node('D', [dTrue, dFalse])

nodes = network.getNodes()
nodes.add(a)
nodes.add(b)
nodes.add(c)
nodes.add(d)

# add some directed links
links = network.getLinks()
links.add(Link(a, b))
links.add(Link(a, c))
links.add(Link(b, d))
links.add(Link(c, d))

# at this point we have fully specified the structural (graphical) specification of the Bayesian Network.

# We must define the necessary probability distributions for each node.

# Each node in a Bayesian Network requires a probability distribution conditioned on it's parents.

# newDistribution() can be called on a Node to create the appropriate probability distribution for a node
# or it can be created manually.

# The interface Distribution has been designed to represent both discrete and continuous variables,

# As we are currently dealing with discrete distributions, we will use the
# Table class.

# To access the discrete part of a distribution, we use Distribution.Table.

# The Table class is used to define distributions over a number of discrete variables.

tableA = a.newDistribution().getTable() # access the table property of the Distribution

# IMPORTANT
# Note that calling Node.newDistribution() does NOT assign the distribution to the node.
# A distribution cannot be assigned to a node until it is correctly specified.
# If a distribution becomes invalid (e.g. a parent node is added), it is automatically set to null.

# as node A has no parents there is no ambiguity about the order of variables in the distribution
tableA.set(0.1, [aTrue])
tableA.set(0.9, [aFalse])

# now tableA is correctly specified we can assign it to Node A;
a.setDistribution(tableA)

# node B has node A as a parent, therefore its distribution will be P(B|A)

tableB = b.newDistribution().getTable()
tableB.set(0.2, [aTrue, bTrue])
tableB.set(0.8, [aTrue, bFalse])
tableB.set(0.15, [aFalse, bTrue])
tableB.set(0.85, [aFalse, bFalse])
b.setDistribution(tableB)

# specify P(C|A)
tableC = c.newDistribution().getTable()
tableC.set(0.3, [aTrue, cTrue])
tableC.set(0.7, [aTrue, cFalse])
tableC.set(0.4, [aFalse, cTrue])
tableC.set(0.6, [aFalse, cFalse])
c.setDistribution(tableC)

# specify P(D|B,C)
tableD = d.newDistribution().getTable()

# we could specify the values individually as above, or we can use a TableIterator as follows
iteratorD = TableIterator(tableD, [b, c, d])
iteratorD.copyFrom([0.4, 0.6, 0.55, 0.45, 0.32, 0.68, 0.01, 0.99])
d.setDistribution(tableD)

# The network is now fully specified

# If required the network can be saved...

if False: # change this to true to save the network
network.save('fileName.bayes') # replace 'fileName.bayes' with your own path

# Now we will calculate P(A|D=True), i.e. the probability of A given the evidence that D is true

# use the factory design pattern to create the necessary inference related objects
factory = RelevanceTreeInferenceFactory()
inference = factory.createInferenceEngine(network)
queryOptions = factory.createQueryOptions()
queryOutput = factory.createQueryOutput()

# we could have created these objects explicitly instead, but as the number of algorithms grows
# this makes it easier to switch between them

inference.getEvidence().setState(dTrue) # set D = True

queryA = Table(a)
inference.getQueryDistributions().add(QueryDistribution(queryA))
inference.query(queryOptions, queryOutput) # note that this can raise an exception (see help for details)

print(f'P(A|D=True) = [{queryA.get([aTrue])},{queryA.get([aFalse])}]')

# Expected output ...
# P(A|D=True) = [0.0980748663101604,0.90192513368984]

# to perform another query we reuse all the objects

# now lets calculate P(A|D=True, C=True)
inference.getEvidence().setState(cTrue)

# we will also return the log-likelihood of the case
queryOptions.setLogLikelihood(
True) # only request the log-likelihood if you really need it, as extra computation is involved

inference.query(queryOptions, queryOutput)
print(f'P(A|D=True, C=True) = [{queryA.get([aTrue])},{queryA.get([aFalse])}], log-likelihood = {queryOutput.getLogLikelihood()}.')

# Expected output ...
# P(A|D=True, C=True) = {0.0777777777777778,0.922222222222222}, log-likelihood = -2.04330249506396.


# Note that we can also calculate joint queries such as P(A,B|D=True,C=True)