# Construction & inference in R

source("bayesserver.R")

# In this example we programatically create a simple Bayesian network.

# Note that you can automatically define nodes from data using

# classes in BayesServer.Data.Discovery,

# and you can automatically learn the parameters using classes in

# BayesServer.Learning.Parameters,

# however here we build a Bayesian network from scratch.

# If you are using a licensed version of Bayes Server, uncomment the following line and enter your license key

# licenseBayesServer("license-key-goes-here")

network <- new(Network, "Demo")

# add the nodes (variables)

aTrue <- new(State, "True")

aFalse <- new(State, "False")

a = new(Node, "A", toStateArray(aTrue, aFalse))

bTrue <- new(State, "True")

bFalse <- new(State, "False")

b <- new(Node, "B", toStateArray(bTrue, bFalse))

cTrue <- new(State, "True")

cFalse <- new(State, "False")

c <- new(Node, "C", toStateArray(cTrue, cFalse))

dTrue <- new(State, "True")

dFalse <- new(State, "False")

d = new(Node, "D", toStateArray(dTrue, dFalse))

nodes <- network$getNodes()

links <- network$getLinks()

nodes$add(a)

nodes$add(b)

nodes$add(c)

nodes$add(d)

# add some directed links

links$add(new(Link, a, b))

links$add(new(Link, a, c))

links$add(new(Link, b, d))

links$add(new(Link, c, d))

# at this point we have fully specified the structural (graphical) specification of the Bayesian Network.

# We must define the necessary probability distributions for each node.

# Each node in a Bayesian Network requires a probability distribution conditioned on it's parents.

# newDistribution() can be called on a Node to create the appropriate probability distribution for a node

# or it can be created manually.

# The interface Distribution has been designed to represent both discrete and continuous variables,

# As we are currently dealing with discrete distributions, we will use the

# Table class.

# To access the discrete part of a distribution, we use Distribution.Table.

# The Table class is used to define distributions over a number of discrete variables.

tableA <- a$newDistribution()$getTable() # access the table property of the Distribution

# IMPORTANT

# Note that calling Node.newDistribution() does NOT assign the distribution to the node.

# A distribution cannot be assigned to a node until it is correctly specified.

# If a distribution becomes invalid (e.g. a parent node is added), it is automatically set to null.

# as node A has no parents there is no ambiguity about the order of variables in the distribution

tableA$set(0.1, toStateArray(aTrue))

tableA$set(0.9, toStateArray(aFalse))

# now tableA is correctly specified we can assign it to Node A;

a$setDistribution(tableA)

# node B has node A as a parent, therefore its distribution will be P(B|A)

tableB <- b$newDistribution()$getTable()

tableB$set(0.2, toStateArray(aTrue, bTrue))

tableB$set(0.8, toStateArray(aTrue, bFalse))

tableB$set(0.15, toStateArray(aFalse, bTrue))

tableB$set(0.85, toStateArray(aFalse, bFalse))

b$setDistribution(tableB)

# specify P(C|A)

tableC <- c$newDistribution()$getTable()

tableC$set(0.3, toStateArray(aTrue, cTrue))

tableC$set(0.7, toStateArray(aTrue, cFalse))

tableC$set(0.4, toStateArray(aFalse, cTrue))

tableC$set(0.6, toStateArray(aFalse, cFalse))

c$setDistribution(tableC)

# specify P(D|B,C)

tableD <- d$newDistribution()$getTable()

# we could specify the values individually as above, or we can use a TableIterator as follows

iteratorD <- new(TableIterator, tableD, toNodeArray(b, c, d))

iteratorD$copyFrom(c(0.4, 0.6, 0.55, 0.45, 0.32, 0.68, 0.01, 0.99))

d$setDistribution(tableD)

# The network is now fully specified

# If required the network can be saved...

if (FALSE) # change this to true to save the network

{

network$save("fileName.bayes") # replace 'fileName.bayes' with your own path

}

# Now we will calculate P(A|D=True), i.e. the probability of A given the evidence that D is true

# use the factory design pattern to create the necessary inference related objects

factory <- new(RelevanceTreeInferenceFactory)

inference <- factory$createInferenceEngine(network)

queryOptions <- factory$createQueryOptions()

queryOutput <- factory$createQueryOutput()

# we could have created these objects explicitly instead, but as the number of algorithms grows

# this makes it easier to switch between them

evidence <- inference$getEvidence()

evidence$setState(dTrue) # set D = True

queryA <- new(Table, a)

inference$getQueryDistributions()$add(queryA)

inference$query(queryOptions, queryOutput) # note that this can raise an exception (see help for details)

print(sprintf("P(A|D=True) = {%f,%f}", queryA$get(toStateArray(aTrue)), queryA$get(toStateArray(aFalse))))

# Expected output ...

# P(A|D=True) = {0.0980748663101604,0.90192513368984}

# to perform another query we reuse all the objects

# now lets calculate P(A|D=True, C=True)

evidence$setState(cTrue)

# we will also return the log-likelihood of the case

queryOptions$setLogLikelihood(TRUE) # only request the log-likelihood if you really need it, as extra computation is involved

inference$query(queryOptions, queryOutput)

print(sprintf("P(A|D=True, C=True) = {%f,%f}, log-likelihood = %f.", queryA$get(toStateArray(aTrue)), queryA$get(toStateArray(aFalse)), queryOutput$getLogLikelihood()))

# Expected output ...

# P(A|D=True, C=True) = {0.0777777777777778,0.922222222222222}, log-likelihood = -2.04330249506396.

# Note that we can also calculate joint queries such as P(A,B|D=True,C=True)