Patch for LibSVM One Class Classification
Hi,
I made some changes on the code to enable the libsvm-style (-1/1) classification based on nu for oneclass svm's instead of the confidence value. For downwards compatibility I added a parameter to the LibSVMLearner where the appreciated one-class behavior can be selected. This feature was already discussed in:
http://rapid-i.com/rapidforum/index.php/topic,1599.0.html and http://rapid-i.com/rapidforum/index.php/topic,1596.0.html
Here is the patch for rev. 45:
Greetings, Harald
I made some changes on the code to enable the libsvm-style (-1/1) classification based on nu for oneclass svm's instead of the confidence value. For downwards compatibility I added a parameter to the LibSVMLearner where the appreciated one-class behavior can be selected. This feature was already discussed in:
http://rapid-i.com/rapidforum/index.php/topic,1599.0.html and http://rapid-i.com/rapidforum/index.php/topic,1596.0.html
Here is the patch for rev. 45:
And here is a little example which uses the functionality for X-Val:
Index: src/com/rapidminer/operator/learner/functions/kernel/LibSVMLearner.java
===================================================================
--- src/com/rapidminer/operator/learner/functions/kernel/LibSVMLearner.java (revision 45)
+++ src/com/rapidminer/operator/learner/functions/kernel/LibSVMLearner.java (working copy)
/** The parameter name for "Indicates if proper confidence values should be calculated." */
public static final String PARAMETER_CALCULATE_CONFIDENCES = "calculate_confidences";
+ /** The parameter name for "Indicates if the traditional libsvm one-class classification behavior should be used." */
+ public static final String PARAMETER_ONECLASS_CLASSIFICATION = "one_class_classification";
+
/** The parameter name for "Indicates if proper confidence values should be calculated." */
public static final String PARAMETER_CONFIDENCE_FOR_MULTICLASS = "confidence_for_multiclass";
svm_model model = Svm.svm_train(problem, params);
- return new LibSVMModel(exampleSet, model, exampleSet.getAttributes().size(), getParameterAsBoolean(PARAMETER_CONFIDENCE_FOR_MULTICLASS));
+ return new LibSVMModel(exampleSet, model, exampleSet.getAttributes().size(), getParameterAsBoolean(PARAMETER_CONFIDENCE_FOR_MULTICLASS), getParameterAsBoolean("one_class_classification"));
}
@Override
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeBoolean(PARAMETER_CONFIDENCE_FOR_MULTICLASS, "Indicates if the class with the highest confidence should be selected in the multiclass setting. Uses binary majority vote over all 1-vs-1 classifiers otherwise (selected class must not be the one with highest confidence in that case).", true));
+
+ type = new ParameterTypeBoolean(PARAMETER_ONECLASS_CLASSIFICATION, "Indicates if a one-class model should predict the class of an example (integer label: 1 or -1) instead of returning the class confidence. By default a confidence is calculated which can be processed by threshold operators.", false);
+ type.setExpert(false);
+ type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_SVM_TYPE, SVM_TYPES, false, SVM_TYPE_ONE_CLASS));
+ types.add(type);
+
return types;
}
}
Index: src/com/rapidminer/operator/learner/functions/kernel/LibSVMModel.java
===================================================================
--- src/com/rapidminer/operator/learner/functions/kernel/LibSVMModel.java (revision 45)
+++ src/com/rapidminer/operator/learner/functions/kernel/LibSVMModel.java (working copy)
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.FastExample2SparseTransform;
+import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.FormulaProvider;
+import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.Tools;
/**
private boolean confidenceForMultiClass = true;
- public LibSVMModel(ExampleSet exampleSet, svm_model model, int numberOfAttributes, boolean confidenceForMultiClass) {
+ private boolean oneClassClassification = false;
+
+ public LibSVMModel(ExampleSet exampleSet, svm_model model, int numberOfAttributes, boolean confidenceForMultiClass, boolean oneClassClassification) {
super(exampleSet);
this.model = model;
this.numberOfAttributes = numberOfAttributes;
this.confidenceForMultiClass = confidenceForMultiClass;
+ this.oneClassClassification = oneClassClassification;
}
@Override
confidenceAttributes = exampleSet.getAttributes().getSpecial(Attributes.CONFIDENCE_NAME + "_" + labelName);
}
}
-
-
+
if (label.isNominal() && (label.getMapping().size() == 1)) { // one class SVM
- double[] allConfidences = new double[exampleSet.size()];
+
int counter = 0;
- double maxConfidence = Double.NEGATIVE_INFINITY;
- double minConfidence = Double.POSITIVE_INFINITY;
Iterator<Example> i = exampleSet.iterator();
- while (i.hasNext()) {
- Example e = i.next();
- svm_node[] currentNodes = LibSVMLearner.makeNodes(e, ripper);
+
+ if (oneClassClassification) {
+ // classification behavior
+ String name = predictedLabel.getName();
+ Attribute newLabel = AttributeFactory.createAttribute(name, Ontology.INTEGER);
+ newLabel.clearTransformations();
- double[] prob = new double[1];
- Svm.svm_predict_values(model, currentNodes, prob);
- allConfidences[counter++] = prob[0];
- minConfidence = Math.min(minConfidence, prob[0]);
- maxConfidence = Math.max(maxConfidence, prob[0]);
+ exampleSet.getExampleTable().removeAttribute(predictedLabel);
+ exampleSet.getExampleTable().addAttribute(newLabel);
+ exampleSet.getAttributes().setPredictedLabel(newLabel);
+
+ while (i.hasNext()) {
+ Example e = i.next();
+ svm_node[] currentNodes = LibSVMLearner.makeNodes(e, ripper);
+ e.setPredictedLabel((int) Svm.svm_predict(model, currentNodes));
+ }
+ } else {
+ // classic behavior
+ double[] allConfidences = new double[exampleSet.size()];
+ double maxConfidence = Double.NEGATIVE_INFINITY;
+ double minConfidence = Double.POSITIVE_INFINITY;
+
+ while (i.hasNext()) {
+ Example e = i.next();
+ svm_node[] currentNodes = LibSVMLearner.makeNodes(e, ripper);
+
+ double[] prob = new double[1];
+ Svm.svm_predict_values(model, currentNodes, prob);
+ allConfidences[counter++] = prob[0];
+ minConfidence = Math.min(minConfidence, prob[0]);
+ maxConfidence = Math.max(maxConfidence, prob[0]);
+ }
+
+ counter = 0;
+ String className = predictedLabel.getMapping().mapIndex(0);
+
+ i = exampleSet.iterator();
+
+ while (i.hasNext()) {
+ Example e = i.next();
+ e.setValue(predictedLabel, 0);
+ e.setConfidence(className, (allConfidences[counter++] - minConfidence) / (maxConfidence - minConfidence));
+ }
}
-
- counter = 0;
- String className = predictedLabel.getMapping().mapIndex(0);
- i = exampleSet.iterator();
- while (i.hasNext()) {
- Example e = i.next();
- e.setValue(predictedLabel, 0);
- e.setConfidence(className, (allConfidences[counter++] - minConfidence) / (maxConfidence - minConfidence));
- }
} else {
Iterator<Example> i = exampleSet.iterator();
while (i.hasNext()) {
The returned prediction values are integer and either 1 (same class) or -1 (outlier) directly from libsvm. The real labels cannot be used because one-class models are trained with one type of data. Therefore some processing (numerical2nominal, map) is necessary to allow performance evaluation. Its not nice but you can see the usage it in the example. Please feel free to use the code as you like!
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input>
<location/>
</input>
<output>
<location/>
<location/>
</output>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="-20" width="-50">
<operator activated="true" class="generate_data" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
<parameter key="target_function" value="two gaussians classification"/>
<parameter key="number_examples" value="500"/>
<parameter key="number_of_attributes" value="8"/>
<parameter key="attributes_lower_bound" value="0.0"/>
<parameter key="attributes_upper_bound" value="1.0"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="85"/>
</operator>
<operator activated="true" class="x_validation" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<process expanded="true">
<operator activated="true" class="filter_examples" expanded="true" height="76" name="Filter Examples" width="90" x="45" y="30">
<parameter key="condition_class" value="attribute_value_filter"/>
<parameter key="parameter_string" value="label=cluster1"/>
</operator>
<operator activated="true" class="select_attributes" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" expanded="true" height="76" name="Generate Attributes" width="90" x="45" y="120">
<list key="function_descriptions">
<parameter key="label" value=""cluster1""/>
</list>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="180" y="120">
<parameter key="name" value="label"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" expanded="true" height="76" name="SVM" width="90" x="313" y="30">
<parameter key="svm_type" value="one-class"/>
<parameter key="coef0" value="3.0"/>
<parameter key="nu" value="0.4"/>
<list key="class_weights"/>
<parameter key="one_class_classification" value="true"/>
</operator>
<connect from_port="training" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="numerical_to_polynominal" expanded="true" height="76" name="Numerical to Polynominal" width="90" x="179" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="prediction(label)"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="map" expanded="true" height="76" name="Map" width="90" x="313" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="prediction(label)"/>
<parameter key="include_special_attributes" value="true"/>
<list key="value_mappings">
<parameter key="-1" value="cluster0"/>
<parameter key="1" value="cluster1"/>
</list>
</operator>
<operator activated="true" class="performance" expanded="true" height="76" name="Performance" width="90" x="447" y="30"/>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Numerical to Polynominal" to_port="example set input"/>
<connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Map" to_port="example set input"/>
<connect from_op="Map" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Greetings, Harald