Difference in classification accuracy(performance vector) for same input -
DavidRaju
New Altair Community Member
Could you please clarify,
Why Im getting different accuracies (performance vector result) for the same input supplied to two similar models.
see my next post with similar kind of problem
Why Im getting different accuracies (performance vector result) for the same input supplied to two similar models.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="467" width="748">
<operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="76" name="Multiply" width="90" x="45" y="120"/>
<operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="75">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="246" y="165"/>
<operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (2)" width="90" x="380" y="30">
<process expanded="true" height="414" width="346">
<operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (2)" width="90" x="132" y="30"/>
<connect from_port="training" to_op="Decision Tree (2)" to_port="training set"/>
<connect from_op="Decision Tree (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="414" width="346">
<operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (2)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (2)" width="90" x="200" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="165">
<process expanded="true" height="396" width="346">
<operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
<connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
<connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="396" width="346">
<operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
<connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Validation (2)" to_port="training"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Validation (3)" to_port="training"/>
<connect from_op="Validation (2)" from_port="training" to_port="result 1"/>
<connect from_op="Validation (2)" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Validation (3)" from_port="training" to_port="result 3"/>
<connect from_op="Validation (3)" from_port="averagable 1" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
see my next post with similar kind of problem
Tagged:
0
Answers
-
contd...
Further why the accuracy(performance vector) for the validation operator 1 ( with Decision tree) [ jointly worked with validation operator 2( with KNN)] is not same with validation operator1 (with Decision tree)
Process 1 -
i/p A - validation operator (DT) - o/p (performance vector) = X,
i/p A - validation operator (kNN) - o/p (performance vector) = y
Process 2-
i/p A - validation operator (DT) - o/p (performance vector) = Z (This should be X)
where X, Y, and Z are some numeric values.
Code is given for both the process
Process1
process 2:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="467" width="681">
<operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="75">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="246" y="210"/>
<operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (4)" width="90" x="380" y="210">
<process expanded="true" height="414" width="346">
<operator activated="false" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (4)" width="90" x="179" y="300"/>
<operator activated="true" class="k_nn" compatibility="5.2.008" expanded="true" height="76" name="k-NN" width="90" x="179" y="30"/>
<connect from_port="training" to_op="k-NN" to_port="training set"/>
<connect from_op="k-NN" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="414" width="346">
<operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (4)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (4)" width="90" x="200" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (4)" to_port="unlabelled data"/>
<connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance (4)" to_port="labelled data"/>
<connect from_op="Performance (4)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="30">
<process expanded="true" height="414" width="346">
<operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
<connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
<connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="414" width="346">
<operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
<connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Validation (3)" to_port="training"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Validation (4)" to_port="training"/>
<connect from_op="Validation (4)" from_port="averagable 1" to_port="result 3"/>
<connect from_op="Validation (3)" from_port="training" to_port="result 1"/>
<connect from_op="Validation (3)" from_port="averagable 1" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
Why the result changes from process to process with same input and model
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="467" width="681">
<operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="76" name="Multiply" width="90" x="45" y="165"/>
<operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="165">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="120">
<process expanded="true" height="414" width="346">
<operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
<connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
<connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="414" width="346">
<operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
<connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Validation (3)" to_port="training"/>
<connect from_op="Validation (3)" from_port="training" to_port="result 1"/>
<connect from_op="Validation (3)" from_port="averagable 1" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
My work halted with ambiguity in results
Clarify me
Thanking you in anticipation0 -
Random seed?0
-
Wessel is right, the X-Validation uses random splits, which are different if executed twice in a row. Set a local random seed for the X-Validation to force the same splits for both operators.
Furthermore I strongly suggest to update from RapidMiner 5.2.8, which is years old, to the current version 5.3.13.
If you have any further questions, please come back!
Best regards,
Marius0 -
Are there any free online books to know more about these parameters to get efficient results0
-
There is a Weka book, soon a Rapid Miner book will be published I think.
weka book:
http://www.cs.waikato.ac.nz/ml/weka/book.html
rapid miner book:
http://rapidminerbook.com/
0 -
thank you, I will try0