Hello,
I am trying to do a qualitative comparison between two different models (same type of model, namely naïve bayes) but different input attributs. Now I want to check the difference in predictions between the models and then plot the distribution of those disagreements between the models. Is there an easy way to do this in RapidMiner?
So far I have tried different techniques like join, append, but those are all not what I want.
I have included the XML code of my process.
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve speed_dating_assignment_processed" width="90" x="45" y="85">
<parameter key="repository_entry" value="../data/speed_dating_assignment_processed"/>
</operator>
<operator activated="true" class="remove_attribute_range" compatibility="8.0.001" expanded="true" height="82" name="Delete useless attr" width="90" x="45" y="187">
<parameter key="first_attribute" value="78"/>
<parameter key="last_attribute" value="174"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Delete attr with too much data" width="90" x="45" y="340">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="match"/>
<parameter key="attributes" value="|match|gender"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="split_data" compatibility="8.0.001" expanded="true" height="103" name="Split Data" width="90" x="45" y="493">
<enumeration key="partitions">
<parameter key="ratio" value="0.8"/>
<parameter key="ratio" value="0.2"/>
</enumeration>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="500"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Delete sensitive attr (2)" width="90" x="179" y="595">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="race|income|tuition"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Delete sensitive attr" width="90" x="179" y="442">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="race|income|tuition"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="8.0.001" expanded="true" height="82" name="Naive Bayes" width="90" x="313" y="442"/>
<operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model (3)" width="90" x="447" y="493">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="8.0.001" expanded="true" height="82" name="Perf. No sensitive" width="90" x="581" y="493"/>
<operator activated="true" class="naive_bayes" compatibility="8.0.001" expanded="true" height="82" name="Naive Bayes (2)" width="90" x="313" y="289"/>
<operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="447" y="340">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="8.0.001" expanded="true" height="82" name="Perf. With sensitive" width="90" x="581" y="340"/>
<connect from_op="Retrieve speed_dating_assignment_processed" from_port="output" to_op="Delete useless attr" to_port="example set input"/>
<connect from_op="Delete useless attr" from_port="example set output" to_op="Delete attr with too much data" to_port="example set input"/>
<connect from_op="Delete attr with too much data" from_port="example set output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Delete sensitive attr" to_port="example set input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Delete sensitive attr (2)" to_port="example set input"/>
<connect from_op="Delete sensitive attr (2)" from_port="example set output" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Delete sensitive attr (2)" from_port="original" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Delete sensitive attr" from_port="example set output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Delete sensitive attr" from_port="original" to_op="Naive Bayes (2)" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_op="Apply Model (3)" to_port="model"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="Perf. No sensitive" to_port="labelled data"/>
<connect from_op="Naive Bayes (2)" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Perf. With sensitive" to_port="labelled data"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>