Difference in classification accuracy(performance vector) for same input -

DavidRaju
DavidRaju New Altair Community Member
edited November 5 in Community Q&A
Could you please clarify,
Why Im getting different accuracies (performance vector result) for the same input supplied to two similar models.

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
   <process expanded="true" height="467" width="748">
     <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
       <parameter key="repository_entry" value="//Samples/data/Sonar"/>
     </operator>
     <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="76" name="Multiply" width="90" x="45" y="120"/>
     <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="75">
       <parameter key="attribute_filter_type" value="subset"/>
       <parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
       <parameter key="invert_selection" value="true"/>
     </operator>
     <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="246" y="165"/>
     <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (2)" width="90" x="380" y="30">
       <process expanded="true" height="414" width="346">
         <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (2)" width="90" x="132" y="30"/>
         <connect from_port="training" to_op="Decision Tree (2)" to_port="training set"/>
         <connect from_op="Decision Tree (2)" from_port="model" to_port="model"/>
         <portSpacing port="source_training" spacing="0"/>
         <portSpacing port="sink_model" spacing="0"/>
         <portSpacing port="sink_through 1" spacing="0"/>
       </process>
       <process expanded="true" height="414" width="346">
         <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (2)" width="90" x="45" y="30">
           <list key="application_parameters"/>
         </operator>
         <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (2)" width="90" x="200" y="30">
           <list key="class_weights"/>
         </operator>
         <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
         <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
         <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
         <connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
         <portSpacing port="source_model" spacing="0"/>
         <portSpacing port="source_test set" spacing="0"/>
         <portSpacing port="source_through 1" spacing="0"/>
         <portSpacing port="sink_averagable 1" spacing="0"/>
         <portSpacing port="sink_averagable 2" spacing="0"/>
       </process>
     </operator>
     <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="165">
       <process expanded="true" height="396" width="346">
         <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
         <connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
         <connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
         <portSpacing port="source_training" spacing="0"/>
         <portSpacing port="sink_model" spacing="0"/>
         <portSpacing port="sink_through 1" spacing="0"/>
       </process>
       <process expanded="true" height="396" width="346">
         <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
           <list key="application_parameters"/>
         </operator>
         <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
           <list key="class_weights"/>
         </operator>
         <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
         <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
         <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
         <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
         <portSpacing port="source_model" spacing="0"/>
         <portSpacing port="source_test set" spacing="0"/>
         <portSpacing port="source_through 1" spacing="0"/>
         <portSpacing port="sink_averagable 1" spacing="0"/>
         <portSpacing port="sink_averagable 2" spacing="0"/>
       </process>
     </operator>
     <connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
     <connect from_op="Multiply" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
     <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
     <connect from_op="Multiply (2)" from_port="output 1" to_op="Validation (2)" to_port="training"/>
     <connect from_op="Multiply (2)" from_port="output 2" to_op="Validation (3)" to_port="training"/>
     <connect from_op="Validation (2)" from_port="training" to_port="result 1"/>
     <connect from_op="Validation (2)" from_port="averagable 1" to_port="result 2"/>
     <connect from_op="Validation (3)" from_port="training" to_port="result 3"/>
     <connect from_op="Validation (3)" from_port="averagable 1" to_port="result 4"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
     <portSpacing port="sink_result 3" spacing="0"/>
     <portSpacing port="sink_result 4" spacing="0"/>
     <portSpacing port="sink_result 5" spacing="0"/>
   </process>
 </operator>
</process>

see my next post with similar kind of problem

Answers

  • DavidRaju
    DavidRaju New Altair Community Member
    contd...
    Further why the  accuracy(performance vector)  for the validation operator 1 ( with Decision tree) [ jointly worked with validation operator 2( with KNN)] is not same with validation operator1 (with Decision tree)
    Process 1 -

    i/p A  - validation operator (DT) - o/p (performance vector) = X,
    i/p A  - validation operator (kNN) - o/p (performance vector) = y

    Process 2-
    i/p A  - validation operator (DT) - o/p (performance vector) = Z  (This should be X)

    where X, Y, and  Z are some numeric values.

    Code is given for both the process
    Process1

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
     <context>
       <input/>
       <output/>
       <macros/>
     </context>
     <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
       <process expanded="true" height="467" width="681">
         <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
           <parameter key="repository_entry" value="//Samples/data/Sonar"/>
         </operator>
         <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="75">
           <parameter key="attribute_filter_type" value="subset"/>
           <parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
           <parameter key="invert_selection" value="true"/>
         </operator>
         <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="246" y="210"/>
         <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (4)" width="90" x="380" y="210">
           <process expanded="true" height="414" width="346">
             <operator activated="false" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (4)" width="90" x="179" y="300"/>
             <operator activated="true" class="k_nn" compatibility="5.2.008" expanded="true" height="76" name="k-NN" width="90" x="179" y="30"/>
             <connect from_port="training" to_op="k-NN" to_port="training set"/>
             <connect from_op="k-NN" from_port="model" to_port="model"/>
             <portSpacing port="source_training" spacing="0"/>
             <portSpacing port="sink_model" spacing="0"/>
             <portSpacing port="sink_through 1" spacing="0"/>
           </process>
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (4)" width="90" x="45" y="30">
               <list key="application_parameters"/>
             </operator>
             <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (4)" width="90" x="200" y="30">
               <list key="class_weights"/>
             </operator>
             <connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
             <connect from_port="test set" to_op="Apply Model (4)" to_port="unlabelled data"/>
             <connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance (4)" to_port="labelled data"/>
             <connect from_op="Performance (4)" from_port="performance" to_port="averagable 1"/>
             <portSpacing port="source_model" spacing="0"/>
             <portSpacing port="source_test set" spacing="0"/>
             <portSpacing port="source_through 1" spacing="0"/>
             <portSpacing port="sink_averagable 1" spacing="0"/>
             <portSpacing port="sink_averagable 2" spacing="0"/>
           </process>
         </operator>
         <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="30">
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
             <connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
             <connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
             <portSpacing port="source_training" spacing="0"/>
             <portSpacing port="sink_model" spacing="0"/>
             <portSpacing port="sink_through 1" spacing="0"/>
           </process>
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
               <list key="application_parameters"/>
             </operator>
             <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
               <list key="class_weights"/>
             </operator>
             <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
             <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
             <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
             <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
             <portSpacing port="source_model" spacing="0"/>
             <portSpacing port="source_test set" spacing="0"/>
             <portSpacing port="source_through 1" spacing="0"/>
             <portSpacing port="sink_averagable 1" spacing="0"/>
             <portSpacing port="sink_averagable 2" spacing="0"/>
           </process>
         </operator>
         <connect from_op="Retrieve" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
         <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
         <connect from_op="Multiply (2)" from_port="output 1" to_op="Validation (3)" to_port="training"/>
         <connect from_op="Multiply (2)" from_port="output 2" to_op="Validation (4)" to_port="training"/>
         <connect from_op="Validation (4)" from_port="averagable 1" to_port="result 3"/>
         <connect from_op="Validation (3)" from_port="training" to_port="result 1"/>
         <connect from_op="Validation (3)" from_port="averagable 1" to_port="result 2"/>
         <portSpacing port="source_input 1" spacing="0"/>
         <portSpacing port="sink_result 1" spacing="0"/>
         <portSpacing port="sink_result 2" spacing="0"/>
         <portSpacing port="sink_result 3" spacing="0"/>
         <portSpacing port="sink_result 4" spacing="0"/>
       </process>
     </operator>
    </process>
    process 2:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
     <context>
       <input/>
       <output/>
       <macros/>
     </context>
     <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
       <process expanded="true" height="467" width="681">
         <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
           <parameter key="repository_entry" value="//Samples/data/Sonar"/>
         </operator>
         <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="76" name="Multiply" width="90" x="45" y="165"/>
         <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="165">
           <parameter key="attribute_filter_type" value="subset"/>
           <parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
           <parameter key="invert_selection" value="true"/>
         </operator>
         <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="120">
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
             <connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
             <connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
             <portSpacing port="source_training" spacing="0"/>
             <portSpacing port="sink_model" spacing="0"/>
             <portSpacing port="sink_through 1" spacing="0"/>
           </process>
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
               <list key="application_parameters"/>
             </operator>
             <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
               <list key="class_weights"/>
             </operator>
             <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
             <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
             <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
             <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
             <portSpacing port="source_model" spacing="0"/>
             <portSpacing port="source_test set" spacing="0"/>
             <portSpacing port="source_through 1" spacing="0"/>
             <portSpacing port="sink_averagable 1" spacing="0"/>
             <portSpacing port="sink_averagable 2" spacing="0"/>
           </process>
         </operator>
         <connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
         <connect from_op="Multiply" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
         <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Validation (3)" to_port="training"/>
         <connect from_op="Validation (3)" from_port="training" to_port="result 1"/>
         <connect from_op="Validation (3)" from_port="averagable 1" to_port="result 2"/>
         <portSpacing port="source_input 1" spacing="0"/>
         <portSpacing port="sink_result 1" spacing="0"/>
         <portSpacing port="sink_result 2" spacing="0"/>
         <portSpacing port="sink_result 3" spacing="0"/>
       </process>
     </operator>
    </process>

    Why the result changes from process to process with same input and model

    My work halted with ambiguity in results

    Clarify me
    Thanking you in anticipation
  • wessel
    wessel New Altair Community Member
    Random seed?
  • MariusHelf
    MariusHelf New Altair Community Member
    Wessel is right, the X-Validation uses random splits, which are different if executed twice in a row. Set a local random seed for the X-Validation to force the same splits for both operators.

    Furthermore I strongly suggest to update from RapidMiner 5.2.8, which is years old, to the current version 5.3.13.

    If you have any further questions, please come back!

    Best regards,
    Marius
  • DavidRaju
    DavidRaju New Altair Community Member
    Are there any free online books to know more about these parameters to get efficient results
  • wessel
    wessel New Altair Community Member
    There is a Weka book, soon a Rapid Miner book will be published I think.

    weka book:
    http://www.cs.waikato.ac.nz/ml/weka/book.html

    rapid miner book:
    http://rapidminerbook.com/

  • DavidRaju
    DavidRaju New Altair Community Member
    thank you, I will try