Parameter learning in Python

This example makes use of the Python code in Data Frame Utils.
# __author__ = 'Bayes Server'
# __version__= '0.4'

import pandas as pd
import jpype    # pip install jpype1    (version 1.2.1 or later)
import jpype.imports
from jpype.types import *

classpath = "lib/bayesserver-10.8.jar"  # TODO download the Bayes Server Java API, and adjust the path

# Launch the JVM
jpype.startJVM(classpath=[classpath])

import data_frame_utils as dfu

# import the Java modules
from com.bayesserver import *
from com.bayesserver.inference import *
from com.bayesserver.learning.parameters import *
from com.bayesserver.data import *
from jpype import java, JImplements, JOverride

# Uncomment the following line and change the license key, if you are using a licensed version
# License.validate("xxx")


@JImplements(ParameterLearningProgress)
class ConsoleParameterLearningProgress(object):

    @JOverride
    def update(self, info):
        """
        See ParameterLearningProgress in the Bayes Server .NET or Java API docs for details.
        """
        print('Iterations: ', info.getIterationCount(), ', log-likelihood: ', info.getLogLikelihood())

    @JOverride
    def getDistributionMonitoring(self):
        """
        See ParameterLearningProgress in the Bayes Server .NET or Java API docs for details.
        """
        return DistributionMonitoring.NONE


def train_network():
    """
    Example code that learns the parameters of a Bayesian network from data.
    """
    network = create_network_structure()  # we manually construct the network here, but it could be loaded from a file
    x = network.getVariables().get('X', True)
    y = network.getVariables().get('Y', True)

    # now learn the parameters from the data in Tutorial 2 - Mixture model

    df = pd.read_csv('data/mixture_model.csv')  # Note: Saved as csv from the Tutorial data installed with Bayes Server
    dt = dfu.to_data_table(df)

    # We will use the RelevanceTree algorithm here, as it is optimized for parameter learning
    learning = ParameterLearning(network, RelevanceTreeInferenceFactory())
    learning_options = ParameterLearningOptions()


    progress = ConsoleParameterLearningProgress()
    learning_options.setProgress(progress)

    data_reader_command = DataTableDataReaderCommand(dt)

    reader_options = ReaderOptions(None, None)  # we do not have a case column or weight column in this example


    # here we map variables to database columns
    # in this case the variables and database columns have the same name
    variable_references = [
        VariableReference(x, ColumnValueType.VALUE, x.getName()),
        VariableReference(y, ColumnValueType.VALUE, y.getName())
    ]

    # note that although this example only has non temporal data
    # we could have included additional temporal variables and data

    evidence_reader_command = DefaultEvidenceReaderCommand(
        data_reader_command,
        java.util.Arrays.asList(variable_references),
        reader_options)

    result = learning.learn(evidence_reader_command, learning_options)

    print('Log likelihood = ' + str(result.getLogLikelihood()))


def create_network_structure():
    network = Network()

    node_cluster = Node('Cluster', ['Cluster1', 'Cluster2', 'Cluster3'])
    network.getNodes().add(node_cluster)

    x = Variable('X', VariableValueType.CONTINUOUS)
    y = Variable('Y', VariableValueType.CONTINUOUS)

    node_position = Node('Position', JArray(Variable)([x, y]))
    network.getNodes().add(node_position)

    network.getLinks().add(Link(node_cluster, node_position))

    # at this point the Bayesian network structure is fully specified

    return network


train_network()