Skip to main content

Construction & inference in R


source("bayesserver.R")

# In this example we programatically create a simple Bayesian network.
# Note that you can automatically define nodes from data using
# classes in BayesServer.Data.Discovery,
# and you can automatically learn the parameters using classes in
# BayesServer.Learning.Parameters,
# however here we build a Bayesian network from scratch.


# If you are using a licensed version of Bayes Server, uncomment the following line and enter your license key
# licenseBayesServer("license-key-goes-here")

network <- new(Network, "Demo")

# add the nodes (variables)

aTrue <- new(State, "True")
aFalse <- new(State, "False")
a = new(Node, "A", toStateArray(aTrue, aFalse))

bTrue <- new(State, "True")
bFalse <- new(State, "False")
b <- new(Node, "B", toStateArray(bTrue, bFalse))

cTrue <- new(State, "True")
cFalse <- new(State, "False")
c <- new(Node, "C", toStateArray(cTrue, cFalse))

dTrue <- new(State, "True")
dFalse <- new(State, "False")
d = new(Node, "D", toStateArray(dTrue, dFalse))

nodes <- network$getNodes()
links <- network$getLinks()

nodes$add(a)
nodes$add(b)
nodes$add(c)
nodes$add(d)

# add some directed links

links$add(new(Link, a, b))
links$add(new(Link, a, c))
links$add(new(Link, b, d))
links$add(new(Link, c, d))

# at this point we have fully specified the structural (graphical) specification of the Bayesian Network.

# We must define the necessary probability distributions for each node.

# Each node in a Bayesian Network requires a probability distribution conditioned on it's parents.

# newDistribution() can be called on a Node to create the appropriate probability distribution for a node
# or it can be created manually.

# The interface Distribution has been designed to represent both discrete and continuous variables,

# As we are currently dealing with discrete distributions, we will use the
# Table class.

# To access the discrete part of a distribution, we use Distribution.Table.

# The Table class is used to define distributions over a number of discrete variables.

tableA <- a$newDistribution()$getTable() # access the table property of the Distribution

# IMPORTANT
# Note that calling Node.newDistribution() does NOT assign the distribution to the node.
# A distribution cannot be assigned to a node until it is correctly specified.
# If a distribution becomes invalid (e.g. a parent node is added), it is automatically set to null.

# as node A has no parents there is no ambiguity about the order of variables in the distribution
tableA$set(0.1, toStateArray(aTrue))
tableA$set(0.9, toStateArray(aFalse))

# now tableA is correctly specified we can assign it to Node A;
a$setDistribution(tableA)


# node B has node A as a parent, therefore its distribution will be P(B|A)

tableB <- b$newDistribution()$getTable()
tableB$set(0.2, toStateArray(aTrue, bTrue))
tableB$set(0.8, toStateArray(aTrue, bFalse))
tableB$set(0.15, toStateArray(aFalse, bTrue))
tableB$set(0.85, toStateArray(aFalse, bFalse))
b$setDistribution(tableB)


# specify P(C|A)
tableC <- c$newDistribution()$getTable()
tableC$set(0.3, toStateArray(aTrue, cTrue))
tableC$set(0.7, toStateArray(aTrue, cFalse))
tableC$set(0.4, toStateArray(aFalse, cTrue))
tableC$set(0.6, toStateArray(aFalse, cFalse))
c$setDistribution(tableC)


# specify P(D|B,C)
tableD <- d$newDistribution()$getTable()

# we could specify the values individually as above, or we can use a TableIterator as follows
iteratorD <- new(TableIterator, tableD, toNodeArray(b, c, d))
iteratorD$copyFrom(c(0.4, 0.6, 0.55, 0.45, 0.32, 0.68, 0.01, 0.99))
d$setDistribution(tableD)


# The network is now fully specified

# If required the network can be saved...

if (FALSE) # change this to true to save the network
{
network$save("fileName.bayes") # replace 'fileName.bayes' with your own path
}

# Now we will calculate P(A|D=True), i.e. the probability of A given the evidence that D is true

# use the factory design pattern to create the necessary inference related objects
factory <- new(RelevanceTreeInferenceFactory)
inference <- factory$createInferenceEngine(network)
queryOptions <- factory$createQueryOptions()
queryOutput <- factory$createQueryOutput()

# we could have created these objects explicitly instead, but as the number of algorithms grows
# this makes it easier to switch between them

evidence <- inference$getEvidence()
evidence$setState(dTrue) # set D = True

queryA <- new(Table, a)
inference$getQueryDistributions()$add(queryA)
inference$query(queryOptions, queryOutput) # note that this can raise an exception (see help for details)

print(sprintf("P(A|D=True) = {%f,%f}", queryA$get(toStateArray(aTrue)), queryA$get(toStateArray(aFalse))))

# Expected output ...
# P(A|D=True) = {0.0980748663101604,0.90192513368984}

# to perform another query we reuse all the objects

# now lets calculate P(A|D=True, C=True)
evidence$setState(cTrue)

# we will also return the log-likelihood of the case
queryOptions$setLogLikelihood(TRUE) # only request the log-likelihood if you really need it, as extra computation is involved

inference$query(queryOptions, queryOutput)
print(sprintf("P(A|D=True, C=True) = {%f,%f}, log-likelihood = %f.", queryA$get(toStateArray(aTrue)), queryA$get(toStateArray(aFalse)), queryOutput$getLogLikelihood()))

# Expected output ...
# P(A|D=True, C=True) = {0.0777777777777778,0.922222222222222}, log-likelihood = -2.04330249506396.


# Note that we can also calculate joint queries such as P(A,B|D=True,C=True)