Construction & inference (Time series) in Python

# __author__ = 'Bayes Server'
# __version__= '0.1'

from jpype import *  # pip install jpype1==0.7.5

print(getDefaultJVMPath())

classpath = "C:\\Program Files\\Bayes Server\\Bayes Server 9.2\\API\\Java\\bayesserver-9.2.jar"

startJVM(getDefaultJVMPath(), "-Djava.class.path=%s" % classpath, convertStrings=False)

bayes = JPackage("com.bayesserver")
bayes_inference = bayes.inference

# Uncomment the following 2 lines and change the license key, if you are using a licensed version
# License = JClass("com.bayesserver.License")
# License.validate("xxx")


# In this example we programatically create a Dynamic Bayesian network (time series).
# Note that you can automatically define nodes from data using
# classes in BayesServer.Data.Discovery,
# and you can automatically learn the parameters using classes in
# BayesServer.Learning.Parameters,
# however here we build a Bayesian network from scratch.

def nullable_double_array(xs):
    """ Helper function to convert to a java array of nullable doubles (java.lang.Double[]) """
    return [None if x is None else java.lang.Double(x) for x in xs]


def nullable_int(x):
    """ Helper function to convert an integer to a java nullable integer (java.lang.Integer) """
    return java.lang.Integer(x)


network = bayes.Network('DBN')

cluster1 = bayes.State('Cluster1')
cluster2 = bayes.State('Cluster2')
cluster3 = bayes.State('Cluster3')
varTransition = bayes.Variable('Transition', cluster1, cluster2, cluster3)
nodeTransition = bayes.Node(varTransition)

# make the node temporal, so that it appears in each time slice
nodeTransition.setTemporalType(bayes.TemporalType.TEMPORAL)

varObs1 = bayes.Variable('Obs1', bayes.VariableValueType.CONTINUOUS)
varObs2 = bayes.Variable('Obs2', bayes.VariableValueType.CONTINUOUS)
varObs3 = bayes.Variable('Obs3', bayes.VariableValueType.CONTINUOUS)
varObs4 = bayes.Variable('Obs4', bayes.VariableValueType.CONTINUOUS)
# observation node is a multi variable node, consisting of 4 continuous variables
nodeObservation = bayes.Node('Observation', [varObs1, varObs2, varObs3, varObs4])
nodeObservation.setTemporalType(bayes.TemporalType.TEMPORAL)

network.getNodes().add(nodeTransition)
network.getNodes().add(nodeObservation)

# link the transition node to the observation node within each time slice
network.getLinks().add(bayes.Link(nodeTransition, nodeObservation))

# add a temporal link of order 1.  This links the transition node to itself in the next time slice
network.getLinks().add(bayes.Link(nodeTransition, nodeTransition, 1))

# at this point the structural specification is complete

# now complete the distributions

# because the transition node has an incoming temporal link of order 1 (from itself), we must specify
# two distributions, the first of which is specified for time = 0

t0 = nullable_int(0)

cluster1Time0 = bayes.StateContext(cluster1, t0)
cluster2Time0 = bayes.StateContext(cluster2, t0)
cluster3Time0 = bayes.StateContext(cluster3, t0)

prior = nodeTransition.newDistribution(0).getTable()
prior.set(0.2, cluster1Time0)
prior.set(0.3, cluster2Time0)
prior.set(0.5, cluster3Time0)

# NewDistribution does not assign the new distribution, so it still must be assigned
nodeTransition.setDistribution(prior)

# // the second is specified for time >= 1
transition = nodeTransition.newDistribution(1).getTable()

# // when specifying temporal distributions, variables which belong to temporal nodes must have times associated
# // NOTE: Each time is specified relative to the current point in time which is defined as zero,
# // therefore the time for variables at the previous time step is -1

tMinus1 = nullable_int(-1)
cluster1TimeM1 = bayes.StateContext(cluster1, tMinus1)
cluster2TimeM1 = bayes.StateContext(cluster2, tMinus1)
cluster3TimeM1 = bayes.StateContext(cluster3, tMinus1)

transition.set(0.2, cluster1TimeM1, cluster1Time0)
transition.set(0.3, cluster1TimeM1, cluster2Time0)
transition.set(0.5, cluster1TimeM1, cluster3Time0)
transition.set(0.4, cluster2TimeM1, cluster1Time0)
transition.set(0.4, cluster2TimeM1, cluster2Time0)
transition.set(0.2, cluster2TimeM1, cluster3Time0)
transition.set(0.9, cluster3TimeM1, cluster1Time0)
transition.set(0.09, cluster3TimeM1, cluster2Time0)
transition.set(0.01, cluster3TimeM1, cluster3Time0)

# an alternative would be to set values using TableIterator.CopyFrom

# bayes_server.TableIterator(
#     transition,
#     [varTransition, varTransition],
#     [tMinus1, t0]
# ).copyFrom(
#     [0.2, 0.3, 0.5, 0.4, 0.4, 0.2, 0.9, 0.09, 0.01])

nodeTransition.getDistributions().set(1, transition)

# Node observation does not have any incoming temporal links, so
# only requires a distribution specified at time >=0
# Calling NewDistribution without specifying a time assumes time zero.
gaussian = nodeObservation.newDistribution()

# set the Gaussian parameters corresponding to the state "Cluster1" of variable "transition"

varObs1Time0 = bayes.VariableContext(varObs1, t0, bayes.HeadTail.HEAD)
varObs2Time0 = bayes.VariableContext(varObs2, t0, bayes.HeadTail.HEAD)
varObs3Time0 = bayes.VariableContext(varObs3, t0, bayes.HeadTail.HEAD)
varObs4Time0 = bayes.VariableContext(varObs4, t0, bayes.HeadTail.HEAD)

gaussian.setMean(varObs1Time0, 3.2, cluster1Time0)
gaussian.setMean(varObs2Time0, 2.4, cluster1Time0)
gaussian.setMean(varObs3Time0, -1.7, cluster1Time0)
gaussian.setMean(varObs4Time0, 6.2, cluster1Time0)

gaussian.setVariance(varObs1Time0, 2.3, cluster1Time0)
gaussian.setVariance(varObs2Time0, 2.1, cluster1Time0)
gaussian.setVariance(varObs3Time0, 3.2, cluster1Time0)
gaussian.setVariance(varObs4Time0, 1.4, cluster1Time0)

gaussian.setCovariance(varObs1Time0, varObs2Time0, -0.3, cluster1Time0)
gaussian.setCovariance(varObs1Time0, varObs3Time0, 0.5, cluster1Time0)
gaussian.setCovariance(varObs1Time0, varObs4Time0, 0.35, cluster1Time0)
gaussian.setCovariance(varObs2Time0, varObs3Time0, 0.12, cluster1Time0)
gaussian.setCovariance(varObs2Time0, varObs4Time0, 0.1, cluster1Time0)
gaussian.setCovariance(varObs3Time0, varObs4Time0, 0.23, cluster1Time0)

# set the Gaussian parameters corresponding to the state "Cluster2" of variable "transition"
gaussian.setMean(varObs1Time0, 3.0, cluster2Time0)
gaussian.setMean(varObs2Time0, 2.8, cluster2Time0)
gaussian.setMean(varObs3Time0, -2.5, cluster2Time0)
gaussian.setMean(varObs4Time0, 6.9, cluster2Time0)

gaussian.setVariance(varObs1Time0, 2.1, cluster2Time0)
gaussian.setVariance(varObs2Time0, 2.2, cluster2Time0)
gaussian.setVariance(varObs3Time0, 3.3, cluster2Time0)
gaussian.setVariance(varObs4Time0, 1.5, cluster2Time0)

gaussian.setCovariance(varObs1Time0, varObs2Time0, -0.4, cluster2Time0)
gaussian.setCovariance(varObs1Time0, varObs3Time0, 0.5, cluster2Time0)
gaussian.setCovariance(varObs1Time0, varObs4Time0, 0.45, cluster2Time0)
gaussian.setCovariance(varObs2Time0, varObs3Time0, 0.22, cluster2Time0)
gaussian.setCovariance(varObs2Time0, varObs4Time0, 0.15, cluster2Time0)
gaussian.setCovariance(varObs3Time0, varObs4Time0, 0.24, cluster2Time0)

# // set the Gaussian parameters corresponding to the state "Cluster3" of variable "transition"

gaussian.setMean(varObs1Time0, 3.8, cluster3Time0)
gaussian.setMean(varObs2Time0, 2.0, cluster3Time0)
gaussian.setMean(varObs3Time0, -1.9, cluster3Time0)
gaussian.setMean(varObs4Time0, 6.25, cluster3Time0)

gaussian.setVariance(varObs1Time0, 2.34, cluster3Time0)
gaussian.setVariance(varObs2Time0, 2.11, cluster3Time0)
gaussian.setVariance(varObs3Time0, 3.22, cluster3Time0)
gaussian.setVariance(varObs4Time0, 1.43, cluster3Time0)

gaussian.setCovariance(varObs1Time0, varObs2Time0, -0.31, cluster3Time0)
gaussian.setCovariance(varObs1Time0, varObs3Time0, 0.52, cluster3Time0)
gaussian.setCovariance(varObs1Time0, varObs4Time0, 0.353, cluster3Time0)
gaussian.setCovariance(varObs2Time0, varObs3Time0, 0.124, cluster3Time0)
gaussian.setCovariance(varObs2Time0, varObs4Time0, 0.15, cluster3Time0)
gaussian.setCovariance(varObs3Time0, varObs4Time0, 0.236, cluster3Time0)

nodeObservation.setDistribution(gaussian)

# optional check to validate network
network.validate(bayes.ValidationOptions())


# at this point the network has been fully specified

# we will now perform some queries on the network

inference = bayes_inference.RelevanceTreeInference(network)
queryOptions = bayes_inference.RelevanceTreeQueryOptions()
queryOutput = bayes_inference.RelevanceTreeQueryOutput()

# set some temporal evidence
inference.getEvidence().set(varObs1, nullable_double_array([2.2, 2.4, 2.6, 2.9]), 0, 0, 4)
inference.getEvidence().set(varObs2, nullable_double_array([None, 4.0, 4.1, 4.88]), 0, 0, 4)
inference.getEvidence().set(varObs3, nullable_double_array([-2.5, -2.3, None, -4.0]), 0, 0, 4)
inference.getEvidence().set(varObs4, nullable_double_array([4.0, 6.5, 4.9, 4.4]), 0, 0, 4)

queryOptions.setLogLikelihood(True)  # only ask for this if you really need it

# predict the observation variables one time step in the future
predict_time = nullable_int(4)

gaussian_future = []

for v in nodeObservation.getVariables():
    gaussian_future_v = bayes.CLGaussian(v, predict_time)
    gaussian_future.append(gaussian_future_v)
    inference.getQueryDistributions().add(gaussian_future_v)

# we will also demonstrate querying a joint distribution

jointFuture = bayes.CLGaussian(java.util.Arrays.asList([varObs1, varObs2]), predict_time)
inference.getQueryDistributions().add(jointFuture)


inference.query(queryOptions, queryOutput)  # note that this can raise an exception (see help for details)

print('LogLikelihood: ',  queryOutput.getLogLikelihood())
print()

for ix, gaussian in enumerate(gaussian_future):
    variableH = nodeObservation.getVariables().get(ix)
    print('P({}(t=4)|evidence)={}'.format(variableH.getName(), gaussian.getMean(variableH, predict_time)))


print()
print('P({},{}|evidence)='.format(varObs1.getName(), varObs2.getName()))
print('{}\t{}'.format(jointFuture.getMean(varObs1, predict_time),  jointFuture.getMean(varObs2, predict_time)))
print('{}\t{}'.format(jointFuture.getVariance(varObs1, predict_time), jointFuture.getCovariance(varObs1, predict_time, varObs2, predict_time)))
print('{}\t{}'.format(jointFuture.getCovariance(varObs2, predict_time, varObs1, predict_time), jointFuture.getVariance(varObs2, predict_time)))

shutdownJVM()

# Expected output...

# LogLikelihood: -26.3688322999762

# P(Obs1(t=4)|evidence)=3.33914912825023
# P(Obs2(t=4)|evidence)=2.38039739886759
# P(Obs3(t=4)|evidence)=-1.98416436694525
# P(Obs4(t=4)|evidence)=6.40822262492584

# P(Obs1,Obs2|evidence)=
# 3.33914912825023        2.38039739886759
# 2.36608725717058        -0.427500059391733
# -0.427500059391733      2.22592296205311