A set of Kotlin extensions for Weka. The goal is to make the use of Weka more convenient and idiomatic.
The full API documentation is available here.
Available extensions:
- Loading Data
- Getting/Setting Data
- Using Filters
- Create Holdout Split
- Perform Holdout Evaluation
- Perform Cross Validation
To add Weka-Kt to your project, add the following dependency to your dependency manager:
Maven
<dependency>
<groupId>com.github.steven-lang</groupId>
<artifactId>weka-kt</artifactId>
<version>0.0.7</version>
</dependency>
Gradle
compile 'com.github.steven-lang:weka-kt:0.0.7'
The following shows a comparision of standard Weka procedures and their alternatives using Weka-Kt
.
Java
ArffLoader loader = new ArffLoader();
loader.setFile(new File("src/test/resources/datasets/iris.arff"));
Instances irisData = loader.getDataSet();
irisData.setClassIndex(4);
Kotlin
var irisData = Instances("src/test/resources/datasets/iris.arff", classIndex = 4)
Java
// Get row
Instance row = irisData.get(5);
// Get value
double valueByIndex = irisData.get(5).value(3);
// Get attribute
Attribute attribute = irisData.attribute(3);
// Get value by attribute
double valueByAttribute = row.value(attribute);
// Set row
irisData.set(6, row);
// Set value at index (6,3)
Instance editRow = irisData.get(6);
editRow.setValue(3, 100.0);
irisData.set(6, editRow);
Kotlin
// Get row
val row = irisData[5]
// Get value
val valueByIndex = irisData[5, 3]
// Get attribute
val attribute = irisData.attributes[3]
// Get value by attribute
val valueByAttribute = row[attribute]
// Set row
irisData[6] = row
// Set value at index (6,3)
irisData[6, 3] = 100.0
Kotlin (slices)
// Get rows 2-20
val rowSubset = iris[2..20]
// Get rows 2-20 (explicit attribute selection with <ALL>)
val rowSubsetEq = iris[2..20, ALL]
// Get all rows and only columns 1-2
val attributeSubset = iris[ALL, 1..2]
// Get rows 2-20 and columnes 1-2
val subset = iris[2..20, 1..2]
Java
// Filter percentage
RemovePercentage removePercentage = new RemovePercentage();
removePercentage.setPercentage(20.0);
removePercentage.setInvertSelection(true);
removePercentage.setInputFormat(irisData);
irisData = Filter.useFilter(irisData, removePercentage);
// Filter attributes
Remove remove = new Remove();
remove.setAttributeIndices("1,2");
remove.setInvertSelection(false);
remove.setInputFormat(irisData);
irisData = Filter.useFilter(irisData, remove);
// Resample data
Resample resample = new Resample();
resample.setRandomSeed(42);
resample.setNoReplacement(false);
resample.setSampleSizePercent(66.0);
resample.setInputFormat(irisData);
irisData = Filter.useFilter(irisData, resample);
Kotlin
// Filter percentage
irisData = irisData.filter(RemovePercentage()) {
percentage = 20.0
invertSelection = true
}
// Filter attributes
irisData = irisData.filter(Remove()) {
attributeIndices = "1,2"
invertSelection = false
}
// Resample data
irisData = irisData.filter(Resample()) {
noReplacement = false
randomSeed = 42
sampleSizePercent = 66.0
}
Kotlin (Chaining Filters)
// Chain Filters
irisData = irisData.filter(RemovePercentage()) { // Filter percentage
percentage = 20.0
invertSelection = true
}.filter(Remove()) { // Filter attributes
attributeIndices = "1,2"
invertSelection = false
}.filter(Resample()) { // Resample data
noReplacement = false
randomSeed = 42
sampleSizePercent = 66.0
}
Java
// Create filter for train set
RemovePercentage removePercentageTrain = new RemovePercentage();
removePercentageTrain.setPercentage(33.0);
removePercentageTrain.setInputFormat(irisData);
// Create filter for test set
RemovePercentage removePercentageTest = new RemovePercentage();
removePercentageTest.setPercentage(33.0);
removePercentageTest.setInvertSelection(true);
removePercentageTest.setInputFormat(irisData);
// User filters and generate train/test sets
Instances train = Filter.useFilter(irisData, removePercentageTrain);
Instances test = Filter.useFilter(irisData, removePercentageTest);
Kotlin
val (train, test) = irisData.split(testPercentage = 33.0)
Java
// Build classifier first
clf.buildClassifier(train);
// Evaluate model
Evaluation eval = new Evaluation(train);
eval.evaluateModel(clf, test);
System.out.println(eval.toSummaryString());
Kotlin
val eval = clf.evaluateHoldout(trainData = train, testData = test)
println(eval.toSummaryString())
Kotlin (Implicit Split)
...
val eval = clf.evaluateHoldout(data = data, testPercentage = 33.0)
Java
// Create cross validation
int numFolds = 10;
Random rand = new Random(1);
Evaluation eval = new Evaluation(irisData);
eval.crossValidateModel(clf, irisData, numFolds, rand);
System.out.println(eval.toSummaryString());
Kotlin
// Create cross validation
val eval = clf.evaluateCrossValidation(data = irisData, numFolds = 10, seed = 1)
println(eval.toSummaryString())
Java
String[] options = Utils.splitOptions("-M 5 -U");
clf.setOptions(options);
Kotlin
clf.setOptions("-M 5 -U")