Example
# imports
import weka.classifiers.trees.J48 as J48
import weka.core.converters.ConverterUtils.DataSource as DS
import os
# load training data
data = DS.read(os.environ.get("MOOC_DATA") + os.sep + "anneal_train.arff")
data.setClassIndex(data.numAttributes() - 1)
# configure classifier
cls = J48()
cls.setOptions(["-C", "0.3"])
# build classifier on training data
cls.buildClassifier(data)
# load unlabeled data
dataUnl = DS.read(os.environ.get("MOOC_DATA") + os.sep + "anneal_unlbl.arff")
dataUnl.setClassIndex(dataUnl.numAttributes() - 1)
# test compatibility of train/unlabeled datasets
msg = dataUnl.equalHeadersMsg(data)
if msg is not None:
print("train and prediction data are not compatible:\n" + msg)
# make predictions
for inst in dataUnl:
dist = cls.distributionForInstance(inst)
labelIndex = cls.classifyInstance(inst)
label = dataUnl.classAttribute().value(int(labelIndex))
print(str(dist) + " - " + str(labelIndex) + " - " + label)