lift chart causes different performance values
Lara
New Altair Community Member
Good morning everybody,
I would like to evaluate my classification model by charts (e.g. lift chart) but I received different performance values like accuracy by adding a lift chart operator, although the performance measurement operator has to be independent of the visual performance operator.
Why is this? Or am I wrong with my process design?
Thank you very much for your help, Lara
(I attached a simple example of the process like I did with my data)
I would like to evaluate my classification model by charts (e.g. lift chart) but I received different performance values like accuracy by adding a lift chart operator, although the performance measurement operator has to be independent of the visual performance operator.
Why is this? Or am I wrong with my process design?
Thank you very much for your help, Lara
(I attached a simple example of the process like I did with my data)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="591" width="1083">
<operator activated="true" class="retrieve" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="nominal_to_binominal" expanded="true" height="94" name="Nominal to Binominal" width="90" x="179" y="30"/>
<operator activated="true" class="nominal_to_numerical" expanded="true" height="94" name="Nominal to Numerical" width="90" x="313" y="30"/>
<operator activated="true" class="nominal_to_binominal" expanded="true" height="94" name="Nominal to Binominal (2)" width="90" x="447" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Play"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="multiply" expanded="true" height="94" name="Multiply" width="90" x="447" y="255"/>
<operator activated="true" class="x_validation" expanded="true" height="112" name="Validation (2)" width="90" x="648" y="255">
<process expanded="true" height="753" width="530">
<operator activated="true" class="support_vector_machine" expanded="true" height="112" name="SVM (2)" width="90" x="220" y="30"/>
<connect from_port="training" to_op="SVM (2)" to_port="training set"/>
<connect from_op="SVM (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="753" width="530">
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model (2)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" expanded="true" height="94" name="Multiply (3)" width="90" x="180" y="30"/>
<operator activated="true" class="create_lift_chart" expanded="true" height="94" name="Create Lift Chart (2)" width="90" x="179" y="165">
<parameter key="target_class" value="yes"/>
</operator>
<operator activated="true" class="remember" expanded="true" height="60" name="Remember (2)" width="90" x="313" y="165">
<parameter key="name" value="lift2"/>
<parameter key="io_object" value="LiftParetoChart"/>
</operator>
<operator activated="true" class="performance_classification" expanded="true" height="76" name="Performance (2)" width="90" x="380" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Multiply (3)" to_port="input"/>
<connect from_op="Apply Model (2)" from_port="model" to_op="Create Lift Chart (2)" to_port="model"/>
<connect from_op="Multiply (3)" from_port="output 1" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Multiply (3)" from_port="output 2" to_op="Create Lift Chart (2)" to_port="example set"/>
<connect from_op="Create Lift Chart (2)" from_port="lift pareto chart" to_op="Remember (2)" to_port="store"/>
<connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" expanded="true" height="112" name="Validation" width="90" x="648" y="30">
<process expanded="true" height="735" width="366">
<operator activated="true" class="support_vector_machine" expanded="true" height="112" name="SVM" width="90" x="246" y="30"/>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="735" width="547">
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" expanded="true" height="94" name="Multiply (2)" width="90" x="246" y="30"/>
<operator activated="true" class="create_lift_chart" expanded="true" height="94" name="Create Lift Chart" width="90" x="246" y="165">
<parameter key="target_class" value="yes"/>
</operator>
<operator activated="true" class="remember" expanded="true" height="60" name="Remember" width="90" x="380" y="165">
<parameter key="name" value="lift"/>
<parameter key="io_object" value="LiftParetoChart"/>
</operator>
<operator activated="true" class="performance_classification" expanded="true" height="76" name="Performance" width="90" x="447" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Apply Model" from_port="model" to_op="Create Lift Chart" to_port="model"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Performance" to_port="labelled data"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Create Lift Chart" to_port="example set"/>
<connect from_op="Create Lift Chart" from_port="lift pareto chart" to_op="Remember" to_port="store"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="recall" expanded="true" height="60" name="Recall" width="90" x="782" y="120">
<parameter key="name" value="lift"/>
<parameter key="io_object" value="LiftParetoChart"/>
</operator>
<operator activated="true" class="recall" expanded="true" height="60" name="Recall (2)" width="90" x="782" y="390">
<parameter key="name" value="lift2"/>
<parameter key="io_object" value="LiftParetoChart"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
<connect from_op="Nominal to Numerical" from_port="example set output" to_op="Nominal to Binominal (2)" to_port="example set input"/>
<connect from_op="Nominal to Binominal (2)" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Validation" to_port="training"/>
<connect from_op="Multiply" from_port="output 2" to_op="Validation (2)" to_port="training"/>
<connect from_op="Validation (2)" from_port="model" to_port="result 4"/>
<connect from_op="Validation (2)" from_port="averagable 1" to_port="result 5"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Recall" from_port="result" to_port="result 3"/>
<connect from_op="Recall (2)" from_port="result" to_port="result 6"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="54"/>
<portSpacing port="sink_result 4" spacing="54"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="162"/>
<portSpacing port="sink_result 7" spacing="0"/>
</process>
</operator>
</process>
0
Answers
-
Hi,
thank you for this hint in this ready to execute process. I found, that this was due to a bug in the lift pareto chart generator, that will add a small noise to the label instead of the attributes. This will be fixed with the next update.
I don't know if you are already noticed but you are generating the chart ten times and overwrite it 9 times. Might be, you should replace the XValidation with the XVPrediction operator and apply the chart generator after this.
Greetings,
Sebastian0 -
Hello Sebastian,
thank you very much for your help. I will await eagerly the next update :-)
Yes, I have noticed that the lift chart would be overwritten, but until now I did not have a solution for this, so thank you for the hint.
Lara0