Skip to main content

Data sampling in Java

package com.bayesserver.examples;


import com.bayesserver.*;
import com.bayesserver.data.sampling.*;
import com.bayesserver.inference.*;

import java.util.Random;

public class DataSampling {

public static void main(String[] args) {

// we manually construct the network here, but it could be loaded from a file
Network network = CreateNetwork();
Variable gender = network.getVariables().get("Gender");
Variable height = network.getVariables().get("Height");
Variable hairLength = network.getVariables().get("Hair Length");

// You can set evidence on 'fixedEvidence' if you wish to fix
// certain variables. Here we fix Gender.
Evidence fixedEvidence = new DefaultEvidence(network);
fixedEvidence.setState(gender.getStates().get("Female", true));

// prepare to sample data from the Bayesian network
DataSampler sampler = new DataSampler(network, fixedEvidence);
DataSamplingOptions options = new DataSamplingOptions();

// If you want to simulate missing data, you can use the following line of code...
// options.setMissingDataProbability(0.05); // set 5% of the data to missing

RandomDefault random = new RandomDefault(0);
Evidence sample = new DefaultEvidence(network); // acts like a buffer to receive each sample

// output 100 samples

System.out.println("Gender\tHeight\tHair Length");
System.out.println("------------------------------");

for (int i = 0; i < 100; i++) {
try {
sampler.takeSample(sample, random, options);
System.out.println(String.format("%s\t%s\t%s",
valueAsText(gender, sample),
valueAsText(height, sample),
valueAsText(hairLength, sample)));
} catch (InconsistentEvidenceException e) {
System.out.println("Inconsistent evidence exception was raised.");
}
}
}

private static String valueAsText(Variable variable, Evidence evidence) {
if (evidence.getEvidenceType(variable) == EvidenceType.NONE)
return "(null)";

if (variable.getValueType() == VariableValueType.CONTINUOUS) {
return String.format("%.2f", evidence.get(variable));
} else {
return variable.getStates().get(evidence.getState(variable)).getName();
}
}

private static Network CreateNetwork() {
Network network = new Network();
Node nodeGender = new Node("Gender", new String[]{"Female", "Male"});
network.getNodes().add(nodeGender);

Node nodeHeight = new Node("Height", VariableValueType.CONTINUOUS);
network.getNodes().add(nodeHeight);

Node nodeHairLength = new Node("Hair Length", new String[]{"Short", "Medium", "Long"});
network.getNodes().add(nodeHairLength);

network.getLinks().add(new Link(nodeGender, nodeHeight));
network.getLinks().add(new Link(nodeGender, nodeHairLength));

// at this point the structure of the Bayesian network is fully specified

// now set the parameters

Table tableGender = nodeGender.newDistribution().getTable();
tableGender.copyFrom(new double[]{0.51, 0.49});
nodeGender.setDistribution(tableGender);

Table tableHairLength = nodeHairLength.newDistribution().getTable();
TableIterator iteratorHairLength = new TableIterator(tableHairLength, new Node[]{nodeGender, nodeHairLength});
iteratorHairLength.copyFrom(new double[]{0.1, 0.4, 0.5, 0.8, 0.15, 0.05});
nodeHairLength.setDistribution(tableHairLength);

CLGaussian gaussianHeight = (CLGaussian) nodeHeight.newDistribution();
// set the mean and variance for females
gaussianHeight.setMean(0, 0, 162.56);
gaussianHeight.setVariance(0, 0, 50.58);

// set the mean and variance for males
gaussianHeight.setMean(1, 0, 176.022);
gaussianHeight.setVariance(1, 0, 50.58);

nodeHeight.setDistribution(gaussianHeight);

// check that the Bayesian network is specified correctly
network.validate(new ValidationOptions());

return network;
}
}