Construction & inference (Time series) in R

source("bayesserver.R")


# In this example we programatically create a Dynamic Bayesian network (time series).
# Note that you can automatically define nodes from data using
# classes in BayesServer.Data.Discovery,
# and you can automatically learn the parameters using classes in
# BayesServer.Learning.Parameters,
# however here we build a Bayesian network from scratch.

network <- new(Network, "DBN")

cluster1 <- new(State, "Cluster1")
cluster2 <- new(State, "Cluster2")
cluster3 <- new(State, "Cluster3")
varTransition <- new(Variable, "Transition", toStateArray(cluster1, cluster2, cluster3))
nodeTransition <- new(Node, varTransition)

# make the node temporal, so that it appears in each time slice
nodeTransition$setTemporalType(TemporalType$TEMPORAL)

varObs1 <- new(Variable, "Obs1", VariableValueType$CONTINUOUS)
varObs2 <- new(Variable, "Obs2", VariableValueType$CONTINUOUS)
varObs3 <- new(Variable, "Obs3", VariableValueType$CONTINUOUS)
varObs4 <- new(Variable, "Obs4", VariableValueType$CONTINUOUS)

# observation node is a multi variable node, consisting of 4 continuous variables
nodeObservation <- new(Node, "Observation", toVariableArray(varObs1, varObs2, varObs3, varObs4))
nodeObservation$setTemporalType(TemporalType$TEMPORAL)

network$getNodes()$add(nodeTransition)
network$getNodes()$add(nodeObservation)

# link the transition node to the observation node within each time slice
network$getLinks()$add(new(Link, nodeTransition, nodeObservation))

# add a temporal link of order 1.  This links the transition node to itself in the next time slice
network$getLinks()$add(new(Link, nodeTransition, nodeTransition, 1L))

# at this point the structural specification is complete

# now complete the distributions

# because the transition node has an incoming temporal link of order 1 (from itself), we must specify
# two distributions, the first of which is specified for time = 0

cluster1Time0 <- new(StateContext, cluster1, new(Integer, "0"))
cluster2Time0 <- new(StateContext, cluster2, new(Integer, "0"))
cluster3Time0 <- new(StateContext, cluster3, new(Integer, "0"))

prior <- nodeTransition$newDistribution(0L)$getTable()
prior$set(0.2, toStateContextArray(cluster1Time0))
prior$set(0.3, toStateContextArray(cluster2Time0))
prior$set(0.5, toStateContextArray(cluster3Time0))

# NewDistribution does not assign the new distribution, so it still must be assigned
nodeTransition$setDistribution(prior)

# the second is specified for time >= 1
transition <- nodeTransition$newDistribution(1L)$getTable()

# when specifying temporal distributions, variables which belong to temporal nodes must have times associated
# NOTE: Each time is specified relative to the current point in time which is defined as zero,
# therefore the time for variables at the previous time step is -1

cluster1TimeM1 <- new(StateContext, cluster1, new(Integer, "-1"))
cluster2TimeM1 <- new(StateContext, cluster2, new(Integer, "-1"))
cluster3TimeM1 <- new(StateContext, cluster3, new(Integer, "-1"))

transition$set(0.2, toStateContextArray(cluster1TimeM1, cluster1Time0))
transition$set(0.3, toStateContextArray(cluster1TimeM1, cluster2Time0))
transition$set(0.5, toStateContextArray(cluster1TimeM1, cluster3Time0))
transition$set(0.4, toStateContextArray(cluster2TimeM1, cluster1Time0))
transition$set(0.4, toStateContextArray(cluster2TimeM1, cluster2Time0))
transition$set(0.2, toStateContextArray(cluster2TimeM1, cluster3Time0))
transition$set(0.9, toStateContextArray(cluster3TimeM1, cluster1Time0))
transition$set(0.09, toStateContextArray(cluster3TimeM1, cluster2Time0))
transition$set(0.01, toStateContextArray(cluster3TimeM1, cluster3Time0))

# an alternative would be to set values using TableIterator.CopyFrom

nodeTransition$getDistributions()$set(1L, transition)

# Node observation does not have any incoming temporal links, so
# only requires a distribution specified at time >=0
# Calling NewDistribution without specifying a time assumes time zero.
gaussian <- nodeObservation$newDistribution()

# set the Gaussian parameters corresponding to the state "Cluster1" of variable "transition"

varObs1Time0 = new(VariableContext, varObs1, new(Integer, "0"), HeadTail$HEAD)
varObs2Time0 = new(VariableContext, varObs2, new(Integer, "0"), HeadTail$HEAD)
varObs3Time0 = new(VariableContext, varObs3, new(Integer, "0"), HeadTail$HEAD)
varObs4Time0 = new(VariableContext, varObs4, new(Integer, "0"), HeadTail$HEAD)

gaussian$setMean(varObs1Time0, 3.2, toStateContextArray(cluster1Time0))
gaussian$setMean(varObs2Time0, 2.4, toStateContextArray(cluster1Time0))
gaussian$setMean(varObs3Time0, -1.7, toStateContextArray(cluster1Time0))
gaussian$setMean(varObs4Time0, 6.2, toStateContextArray(cluster1Time0))

gaussian$setVariance(varObs1Time0, 2.3, toStateContextArray(cluster1Time0))
gaussian$setVariance(varObs2Time0, 2.1, toStateContextArray(cluster1Time0))
gaussian$setVariance(varObs3Time0, 3.2, toStateContextArray(cluster1Time0))
gaussian$setVariance(varObs4Time0, 1.4, toStateContextArray(cluster1Time0))

gaussian$setCovariance(varObs1Time0, varObs2Time0, -0.3, toStateContextArray(cluster1Time0))
gaussian$setCovariance(varObs1Time0, varObs3Time0, 0.5, toStateContextArray(cluster1Time0))
gaussian$setCovariance(varObs1Time0, varObs4Time0, 0.35, toStateContextArray(cluster1Time0))
gaussian$setCovariance(varObs2Time0, varObs3Time0, 0.12, toStateContextArray(cluster1Time0))
gaussian$setCovariance(varObs2Time0, varObs4Time0, 0.1, toStateContextArray(cluster1Time0))
gaussian$setCovariance(varObs3Time0, varObs4Time0, 0.23, toStateContextArray(cluster1Time0))

# set the Gaussian parameters corresponding to the state "Cluster2" of variable "transition"
gaussian$setMean(varObs1Time0, 3.0, toStateContextArray(cluster2Time0))
gaussian$setMean(varObs2Time0, 2.8, toStateContextArray(cluster2Time0))
gaussian$setMean(varObs3Time0, -2.5, toStateContextArray(cluster2Time0))
gaussian$setMean(varObs4Time0, 6.9, toStateContextArray(cluster2Time0))

gaussian$setVariance(varObs1Time0, 2.1, toStateContextArray(cluster2Time0))
gaussian$setVariance(varObs2Time0, 2.2, toStateContextArray(cluster2Time0))
gaussian$setVariance(varObs3Time0, 3.3, toStateContextArray(cluster2Time0))
gaussian$setVariance(varObs4Time0, 1.5, toStateContextArray(cluster2Time0))

gaussian$setCovariance(varObs1Time0, varObs2Time0, -0.4, toStateContextArray(cluster2Time0))
gaussian$setCovariance(varObs1Time0, varObs3Time0, 0.5, toStateContextArray(cluster2Time0))
gaussian$setCovariance(varObs1Time0, varObs4Time0, 0.45, toStateContextArray(cluster2Time0))
gaussian$setCovariance(varObs2Time0, varObs3Time0, 0.22, toStateContextArray(cluster2Time0))
gaussian$setCovariance(varObs2Time0, varObs4Time0, 0.15, toStateContextArray(cluster2Time0))
gaussian$setCovariance(varObs3Time0, varObs4Time0, 0.24, toStateContextArray(cluster2Time0))

# set the Gaussian parameters corresponding to the state "Cluster3" of variable "transition"

gaussian$setMean(varObs1Time0, 3.8, toStateContextArray(cluster3Time0))
gaussian$setMean(varObs2Time0, 2.0, toStateContextArray(cluster3Time0))
gaussian$setMean(varObs3Time0, -1.9, toStateContextArray(cluster3Time0))
gaussian$setMean(varObs4Time0, 6.25, toStateContextArray(cluster3Time0))

gaussian$setVariance(varObs1Time0, 2.34, toStateContextArray(cluster3Time0))
gaussian$setVariance(varObs2Time0, 2.11, toStateContextArray(cluster3Time0))
gaussian$setVariance(varObs3Time0, 3.22, toStateContextArray(cluster3Time0))
gaussian$setVariance(varObs4Time0, 1.43, toStateContextArray(cluster3Time0))

gaussian$setCovariance(varObs1Time0, varObs2Time0, -0.31, toStateContextArray(cluster3Time0))
gaussian$setCovariance(varObs1Time0, varObs3Time0, 0.52, toStateContextArray(cluster3Time0))
gaussian$setCovariance(varObs1Time0, varObs4Time0, 0.353, toStateContextArray(cluster3Time0))
gaussian$setCovariance(varObs2Time0, varObs3Time0, 0.124, toStateContextArray(cluster3Time0))
gaussian$setCovariance(varObs2Time0, varObs4Time0, 0.15, toStateContextArray(cluster3Time0))
gaussian$setCovariance(varObs3Time0, varObs4Time0, 0.236, toStateContextArray(cluster3Time0))

nodeObservation$setDistribution(gaussian)

# optional check to validate network
network$validate(new(ValidationOptions))


# at this point the network has been fully specified

# we will now perform some queries on the network

factory <- new(RelevanceTreeInferenceFactory)
inference <- factory$createInferenceEngine(network)
queryOptions <- factory$createQueryOptions()
queryOutput <- factory$createQueryOutput()

# set some temporal evidence

inference$getEvidence()$set(varObs1, toDoubleArray(2.2, 2.4, 2.6, 2.9), 0L, 0L, 4L)
inference$getEvidence()$set(varObs2, toDoubleArray(NA, 4.0, 4.1, 4.88), 0L, 0L, 4L)
inference$getEvidence()$set(varObs3, toDoubleArray(-2.5, -2.3, NA, -4.0), 0L, 0L, 4L)
inference$getEvidence()$set(varObs4, toDoubleArray(4.0, 6.5, 4.9, 4.4), 0L, 0L, 4L)

queryOptions$setLogLikelihood(TRUE) # only ask for this if you really need it

# predict the observation variables one time step in the future
predictTime = new(Integer, 4L)

gaussianFuture <- lapply(nodeObservation$getVariables(), function(v){
  
  g <- new(CLGaussian, v, predictTime)
  inference$getQueryDistributions()$add(g)
  return(g)
  
})

# we will also demonstrate querying a joint distribution

jointFuture <- new(CLGaussian, toList(varObs1, varObs2), predictTime)
inference$getQueryDistributions()$add(jointFuture)


inference$query(queryOptions, queryOutput) # note that this can raise an exception (see help for details)

print(sprintf("LogLikelihood: %f", queryOutput$getLogLikelihood()))

for (h in 1:(length(gaussianFuture))) {
  variableH <- nodeObservation$getVariables()$get(h-1L)
  mean <- gaussianFuture[[h]]$getMean(variableH, predictTime)
  print(sprintf("P(%s(t=4)|evidence)=%s", variableH$getName(), mean))
}

print(sprintf("P(%s,%s|evidence)=", varObs1$getName(), varObs2$getName()))
print(sprintf("%f  %f", jointFuture$getMean(varObs1, predictTime), jointFuture$getMean(varObs2, predictTime)))
print(sprintf("%f  %f", jointFuture$getVariance(varObs1, predictTime), jointFuture$getCovariance(varObs1, predictTime, varObs2, predictTime)))
print(sprintf("%f  %f", jointFuture$getCovariance(varObs2, predictTime, varObs1, predictTime), jointFuture$getVariance(varObs2, predictTime)))

# Expected output...

# LogLikelihood: -26.3688322999762

# P(Obs1(t=4)|evidence)=3.33914912825023
# P(Obs2(t=4)|evidence)=2.38039739886759
# P(Obs3(t=4)|evidence)=-1.98416436694525
# P(Obs4(t=4)|evidence)=6.40822262492584

# P(Obs1,Obs2|evidence)=
# 3.33914912825023        2.38039739886759
# 2.36608725717058        -0.427500059391733
# -0.427500059391733      2.22592296205311