Data sampling in Python

# __author__ = 'Bayes Server'
# __version__= '0.1'

import pandas as pd
import jpype    # pip install jpype1    (version 1.2.1 or later)
import jpype.imports
from jpype.types import *

classpath = "C:\\Program Files\\Bayes Server\\Bayes Server 9.5\\API\\Java\\bayesserver-9.5.jar"

# Launch the JVM
jpype.startJVM(classpath=[classpath])

import data_frame_utils as dfu

# import the Java modules
from com.bayesserver import *
from com.bayesserver.inference import *
from com.bayesserver.learning.parameters import *
from com.bayesserver.data import *
from com.bayesserver.data.sampling import *
from jpype import java, JImplements, JOverride

# Uncomment the following line and change the license key, if you are using a licensed version
# License.validate("xxx")

def value_as_text(variable, evidence):

    if evidence.getEvidenceType(variable) == EvidenceType.NONE:
        return "(null)"

    if variable.getValueType() == VariableValueType.CONTINUOUS:
        return f"{evidence.get(variable)}"
    else:
        return variable.getStates().get(evidence.getState(variable)).getName()


def create_network():
    network = Network()
    node_gender = Node("Gender", ["Female", "Male"])
    network.getNodes().add(node_gender)

    node_height = Node("Height", VariableValueType.CONTINUOUS)
    network.getNodes().add(node_height)

    node_hair_length = Node("Hair Length", ["Short", "Medium", "Long"])
    network.getNodes().add(node_hair_length)

    network.getLinks().add(Link(node_gender, node_height))
    network.getLinks().add(Link(node_gender, node_hair_length))

    # at this point the structure of the Bayesian network is fully specified

    # now set the parameters

    table_gender = node_gender.newDistribution().getTable()
    table_gender.copyFrom([0.51, 0.49])
    node_gender.setDistribution(table_gender)

    table_hair_length = node_hair_length.newDistribution().getTable()
    iterator_hair_length = TableIterator(table_hair_length, [node_gender, node_hair_length])
    iterator_hair_length.copyFrom([0.1, 0.4, 0.5, 0.8, 0.15, 0.05])
    node_hair_length.setDistribution(table_hair_length)

    gaussian_height = node_height.newDistribution()
    # set the mean and variance for females
    gaussian_height.setMean(0, 0, 162.56)
    gaussian_height.setVariance(0, 0, 50.58)

    # set the mean and variance for males
    gaussian_height.setMean(1, 0, 176.022)
    gaussian_height.setVariance(1, 0, 50.58)

    node_height.setDistribution(gaussian_height)

    # check that the Bayesian network is specified correctly
    network.validate(ValidationOptions())

    return network

# we manually construct the network here, but it could be loaded from a file
network = create_network()
gender = network.getVariables().get("Gender")
height = network.getVariables().get("Height")
hairLength = network.getVariables().get("Hair Length")

# You can set evidence on 'fixedEvidence' if you wish to fix
# certain variables.  Here we fix Gender.
fixedEvidence = DefaultEvidence(network)
fixedEvidence.setState(gender.getStates().get("Female", True))

# prepare to sample data from the Bayesian network
sampler = DataSampler(network, fixedEvidence)
options = DataSamplingOptions()

# If you want to simulate missing data, you can use the following line of code...
# options.setMissingDataProbability(0.05)  # set 5% of the data to missing

random = java.util.Random(0)
sample = DefaultEvidence(network)  # acts like a buffer to receive each sample

# output 100 samples

print("Gender\tHeight\tHair Length")
print("------------------------------")

for i in range(100):
    try:
        sampler.takeSample(sample, random, options)
        print(f"{value_as_text(gender, sample)}\t{value_as_text(height, sample)}\t{value_as_text(hairLength, sample)}")
    except InconsistentEvidenceException:
        print("Inconsistent evidence exception was raised.")