Construction & inference in R

source("bayesserver.R")

# In this example we programatically create a simple Bayesian network.
# Note that you can automatically define nodes from data using
# classes in BayesServer.Data.Discovery,
# and you can automatically learn the parameters using classes in
# BayesServer.Learning.Parameters,
# however here we build a Bayesian network from scratch.

# If you are using a licensed version of Bayes Server, uncomment the following line and enter your license key

network <- new(Network, "Demo")

# add the nodes (variables)

aTrue <- new(State, "True")
aFalse <- new(State, "False")
a = new(Node, "A", toStateArray(aTrue, aFalse))

bTrue <- new(State, "True")
bFalse <- new(State, "False")
b <- new(Node, "B", toStateArray(bTrue, bFalse))

cTrue <- new(State, "True")
cFalse <- new(State, "False")
c <- new(Node, "C", toStateArray(cTrue, cFalse))

dTrue <- new(State, "True")
dFalse <- new(State, "False")
d = new(Node, "D", toStateArray(dTrue, dFalse))

nodes <- network\$getNodes()

# at this point we have fully specified the structural (graphical) specification of the Bayesian Network.

# We must define the necessary probability distributions for each node.

# Each node in a Bayesian Network requires a probability distribution conditioned on it's parents.

# newDistribution() can be called on a Node to create the appropriate probability distribution for a node
# or it can be created manually.

# The interface Distribution has been designed to represent both discrete and continuous variables,

# As we are currently dealing with discrete distributions, we will use the
# Table class.

# To access the discrete part of a distribution, we use Distribution.Table.

# The Table class is used to define distributions over a number of discrete variables.

tableA <- a\$newDistribution()\$getTable()     # access the table property of the Distribution

# IMPORTANT
# Note that calling Node.newDistribution() does NOT assign the distribution to the node.
# A distribution cannot be assigned to a node until it is correctly specified.
# If a distribution becomes invalid  (e.g. a parent node is added), it is automatically set to null.

# as node A has no parents there is no ambiguity about the order of variables in the distribution
tableA\$set(0.1, toStateArray(aTrue))
tableA\$set(0.9, toStateArray(aFalse))

# now tableA is correctly specified we can assign it to Node A;
a\$setDistribution(tableA)

# node B has node A as a parent, therefore its distribution will be P(B|A)

tableB <- b\$newDistribution()\$getTable()
tableB\$set(0.2, toStateArray(aTrue, bTrue))
tableB\$set(0.8, toStateArray(aTrue, bFalse))
tableB\$set(0.15, toStateArray(aFalse, bTrue))
tableB\$set(0.85, toStateArray(aFalse, bFalse))
b\$setDistribution(tableB)

# specify P(C|A)
tableC <- c\$newDistribution()\$getTable()
tableC\$set(0.3, toStateArray(aTrue, cTrue))
tableC\$set(0.7, toStateArray(aTrue, cFalse))
tableC\$set(0.4, toStateArray(aFalse, cTrue))
tableC\$set(0.6, toStateArray(aFalse, cFalse))
c\$setDistribution(tableC)

# specify P(D|B,C)
tableD <- d\$newDistribution()\$getTable()

# we could specify the values individually as above, or we can use a TableIterator as follows
iteratorD <- new(TableIterator, tableD, toNodeArray(b, c, d))
iteratorD\$copyFrom(c(0.4, 0.6, 0.55, 0.45, 0.32, 0.68, 0.01, 0.99))
d\$setDistribution(tableD)

# The network is now fully specified

# If required the network can be saved...

if (FALSE)   # change this to true to save the network
{
network.save("fileName.bayes")  # replace 'fileName.bayes' with your own path
}

# Now we will calculate P(A|D=True), i.e. the probability of A given the evidence that D is true

# use the factory design pattern to create the necessary inference related objects
factory <- new(RelevanceTreeInferenceFactory)
inference <- factory\$createInferenceEngine(network)
queryOptions <- factory\$createQueryOptions()
queryOutput <- factory\$createQueryOutput()

# we could have created these objects explicitly instead, but as the number of algorithms grows
# this makes it easier to switch between them

evidence <- inference\$getEvidence()
evidence\$setState(dTrue)  # set D = True

queryA <- new(Table, a)
inference\$query(queryOptions, queryOutput) # note that this can raise an exception (see help for details)

print(sprintf("P(A|D=True) = {%f,%f}", queryA\$get(toStateArray(aTrue)), queryA\$get(toStateArray(aFalse))))

# Expected output ...
# P(A|D=True) = {0.0980748663101604,0.90192513368984}

# to perform another query we reuse all the objects

# now lets calculate P(A|D=True, C=True)
evidence\$setState(cTrue)

# we will also return the log-likelihood of the case
queryOptions\$setLogLikelihood(TRUE) # only request the log-likelihood if you really need it, as extra computation is involved

inference\$query(queryOptions, queryOutput)
print(sprintf("P(A|D=True, C=True) = {%f,%f}, log-likelihood = %f.", queryA\$get(toStateArray(aTrue)), queryA\$get(toStateArray(aFalse)), queryOutput\$getLogLikelihood()))

# Expected output ...
# P(A|D=True, C=True) = {0.0777777777777778,0.922222222222222}, log-likelihood = -2.04330249506396.

# Note that we can also calculate joint queries such as P(A,B|D=True,C=True)