Skip to main content

Variable generator (Add nodes from data) in Python

# __author__ = 'Bayes Server'
# __version__= '0.2'

import pandas as pd
import jpype # pip install jpype1 (version 1.2.1 or later)
import jpype.imports
from jpype.types import *
from jpype import java

classpath = "lib/bayesserver-10.8.jar" # TODO download the Bayes Server Java API, and adjust the path

# Launch the JVM
jpype.startJVM(classpath=[classpath])

import data_frame_utils as dfu

# import the Java modules
from com.bayesserver import *
from com.bayesserver.inference import *
from com.bayesserver.data import *
from com.bayesserver.data.discovery import *

# Uncomment the following line and change the license key, if you are using a licensed version
# License.validate("xxx")

# This example uses a Pandas DataFrame as the data source for adding variables to a Bayesian network
# You can also connect to databases using DatabaseDataReaderCommand

# NOTE that this can be used to define variables from data,
# but does not learn the structure or parameters of the network (see other examples for learning)

d = {'A': [3.0, 10.0, 14.3, 12.2], 'B': [16, 12, 3, 6], 'C': ['A', 'B', 'A', 'B'], 'D': ['X', 'Y', 'X', 'X']}
df = pd.DataFrame(data=d)

dt = dfu.to_data_table(df)

network = Network()

data_reader_command = DataTableDataReaderCommand(dt)

options = VariableGeneratorOptions()

variable_defs = [
VariableDefinition('A', 'A', VariableValueType.CONTINUOUS),
VariableDefinition('B', 'B', VariableValueType.CONTINUOUS),
VariableDefinition('C', 'C', VariableValueType.DISCRETE),
VariableDefinition('D', 'D', VariableValueType.DISCRETE)
]


variable_infos = VariableGenerator.generate(
data_reader_command,
java.util.Arrays.asList(variable_defs),
options)

# Note that no variables have yet been added to the network

print(network.getVariables().size())


for vi in variable_infos:
variable = vi.getVariable()
network.getNodes().add(Node(variable))

print(network.getVariables().size())