Hi everyone,
I've got a question regarding Metalearning (MetaCost, Bagging, Boosting,...).
Let's assume I've the following process:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.017">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.017" expanded="true" name="Process">
<process expanded="true" height="662" width="1095">
<operator activated="true" class="retrieve" compatibility="5.1.017" expanded="true" height="60" name="Retrieve" width="90" x="45" y="120">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="split_data" compatibility="5.1.017" expanded="true" height="94" name="Split Data" width="90" x="246" y="165">
<enumeration key="partitions">
<parameter key="ratio" value="0.9"/>
<parameter key="ratio" value="0.1"/>
</enumeration>
</operator>
<operator activated="true" class="metacost" compatibility="5.1.017" expanded="true" height="76" name="MetaCost" width="90" x="380" y="30">
<parameter key="cost_matrix" value="[0.0 1.0 1.0;1.0 0.0 1.0;1.0 1.0 0.0]"/>
<process expanded="true" height="662" width="1095">
<operator activated="true" class="decision_tree" compatibility="5.1.017" expanded="true" height="76" name="Decision Tree" width="90" x="246" y="30"/>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.1.017" expanded="true" height="76" name="Apply Model" width="90" x="581" y="120">
<list key="application_parameters"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Split Data" from_port="partition 2" to_op="MetaCost" to_port="training set"/>
<connect from_op="MetaCost" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 2"/>
<connect from_op="Apply Model" from_port="model" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
In the result view you see 10 different decision trees (based on the 10 iterations in MetaCost). My question is, which of the 10 different trees will be used as a model for classification purposes? The last one?
If so, am I correct that the models are kind of evolving of the predecessor tree and the last one is the best?
Cheers Q-Dog